diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2026-04-20 04:28:57 +0300 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2026-04-20 04:28:57 +0300 |
| commit | f4b369c6fe0ceaba2da2daff8c9eb415f85926dd (patch) | |
| tree | 30465d0a429b2c224685b5d8e804bf053c4d129a /tools/lib | |
| parent | ff14dafde15c11403fac61367a34fea08926e9ee (diff) | |
| parent | 2ca45e57ea027fffe3350ae5e21ad9cecb0dce74 (diff) | |
| download | linux-f4b369c6fe0ceaba2da2daff8c9eb415f85926dd.tar.xz | |
Merge branch 'next' into for-linus
Prepare input updates for 7.1 merge window.
Diffstat (limited to 'tools/lib')
46 files changed, 7794 insertions, 177 deletions
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 51255c69754d..aa83d22c45e3 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -140,3 +140,32 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len) *p &= ~mask_to_clear; } } + +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; + unsigned long result = 0; + + for (k = 0; k < lim; k++) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); + return result != 0; +} + +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & ~bitmap2[k]) + return false; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return false; + return true; +} diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 339b19797237..5846de364209 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -154,7 +154,7 @@ int bump_rlimit_memlock(void) memlock_bumped = true; - /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + /* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */ if (memlock_rlim == 0) return 0; @@ -794,6 +794,7 @@ int bpf_link_create(int prog_fd, int target_fd, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_TRACE_FSESSION: case BPF_LSM_MAC: attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); if (!OPTS_ZEROED(opts, tracing)) @@ -1397,3 +1398,22 @@ int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); return libbpf_err_errno(err); } + +int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd, + struct bpf_prog_assoc_struct_ops_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_assoc_struct_ops); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_assoc_struct_ops_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_assoc_struct_ops.map_fd = map_fd; + attr.prog_assoc_struct_ops.prog_fd = prog_fd; + attr.prog_assoc_struct_ops.flags = OPTS_GET(opts, flags, 0); + + err = sys_bpf(BPF_PROG_ASSOC_STRUCT_OPS, &attr, attr_sz); + return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index e983a3e40d61..2c8e88ddb674 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -289,6 +289,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, * Update spin_lock-ed map elements. This must be * specified if the map value contains a spinlock. * + * **BPF_F_CPU** + * As for percpu maps, update value on the specified CPU. And the cpu + * info is embedded into the high 32 bits of **opts->elem_flags**. + * + * **BPF_F_ALL_CPUS** + * As for percpu maps, update value across all CPUs. This flag cannot + * be used with BPF_F_CPU at the same time. + * * @param fd BPF map file descriptor * @param keys pointer to an array of *count* keys * @param values pointer to an array of *count* values @@ -733,6 +741,27 @@ struct bpf_prog_stream_read_opts { LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, struct bpf_prog_stream_read_opts *opts); +struct bpf_prog_assoc_struct_ops_opts { + size_t sz; + __u32 flags; + size_t :0; +}; +#define bpf_prog_assoc_struct_ops_opts__last_field flags + +/** + * @brief **bpf_prog_assoc_struct_ops** associates a BPF program with a + * struct_ops map. + * + * @param prog_fd FD for the BPF program + * @param map_fd FD for the struct_ops map to be associated with the BPF program + * @param opts optional options, can be NULL + * + * @return 0 on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd, + struct bpf_prog_assoc_struct_ops_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index d4e4e388e625..9d160b5b9c0e 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -315,9 +315,6 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) -extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, - __u32 len__sz, void *aux__prog) __weak __ksym; - #define bpf_stream_printk(stream_id, fmt, args...) \ ({ \ static const char ___fmt[] = fmt; \ @@ -328,7 +325,7 @@ extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const vo ___bpf_fill(___param, args); \ _Pragma("GCC diagnostic pop") \ \ - bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \ + bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param)); \ }) /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 18907f0fcf9f..83fe79ffcb8f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -92,6 +92,8 @@ struct btf { * - for split BTF counts number of types added on top of base BTF. */ __u32 nr_types; + /* the start IDs of named types in sorted BTF */ + int named_start_id; /* if not NULL, points to the base BTF on top of which the current * split BTF is based */ @@ -897,46 +899,105 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) return type_id; } -__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +static void btf_check_sorted(struct btf *btf) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 i, n, named_start_id = 0; - if (!strcmp(type_name, "void")) - return 0; + n = btf__type_cnt(btf); + for (i = btf->start_id + 1; i < n; i++) { + struct btf_type *ta = btf_type_by_id(btf, i - 1); + struct btf_type *tb = btf_type_by_id(btf, i); + const char *na = btf__str_by_offset(btf, ta->name_off); + const char *nb = btf__str_by_offset(btf, tb->name_off); - for (i = 1; i < nr_types; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); - const char *name = btf__name_by_offset(btf, t->name_off); + if (strcmp(na, nb) > 0) + return; - if (name && !strcmp(type_name, name)) - return i; + if (named_start_id == 0 && na[0] != '\0') + named_start_id = i - 1; + if (named_start_id == 0 && nb[0] != '\0') + named_start_id = i; } - return libbpf_err(-ENOENT); + if (named_start_id) + btf->named_start_id = named_start_id; +} + +static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name, + __s32 start_id) +{ + const struct btf_type *t; + const char *tname; + __s32 l, r, m; + + l = start_id; + r = btf__type_cnt(btf) - 1; + while (l <= r) { + m = l + (r - l) / 2; + t = btf_type_by_id(btf, m); + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, name) >= 0) { + if (l == r) + return r; + r = m; + } else { + l = m + 1; + } + } + + return btf__type_cnt(btf); } static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, - const char *type_name, __u32 kind) + const char *type_name, __s32 kind) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 nr_types = btf__type_cnt(btf); + const struct btf_type *t; + const char *tname; + __s32 id; - if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) - return 0; + if (start_id < btf->start_id) { + id = btf_find_by_name_kind(btf->base_btf, start_id, + type_name, kind); + if (id >= 0) + return id; + start_id = btf->start_id; + } - for (i = start_id; i < nr_types; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); - const char *name; + if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0) + return 0; - if (btf_kind(t) != kind) - continue; - name = btf__name_by_offset(btf, t->name_off); - if (name && !strcmp(type_name, name)) - return i; + if (btf->named_start_id > 0 && type_name[0]) { + start_id = max(start_id, btf->named_start_id); + id = btf_find_type_by_name_bsearch(btf, type_name, start_id); + for (; id < nr_types; id++) { + t = btf__type_by_id(btf, id); + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, type_name) != 0) + return libbpf_err(-ENOENT); + if (kind < 0 || btf_kind(t) == kind) + return id; + } + } else { + for (id = start_id; id < nr_types; id++) { + t = btf_type_by_id(btf, id); + if (kind > 0 && btf_kind(t) != kind) + continue; + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, type_name) == 0) + return id; + } } return libbpf_err(-ENOENT); } +/* the kind value of -1 indicates that kind matching should be skipped */ +__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +{ + return btf_find_by_name_kind(btf, 1, type_name, -1); +} + __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, __u32 kind) { @@ -1006,6 +1067,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) btf->fd = -1; btf->ptr_sz = sizeof(void *); btf->swapped_endian = false; + btf->named_start_id = 0; if (base_btf) { btf->base_btf = base_btf; @@ -1057,11 +1119,12 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b btf->start_id = 1; btf->start_str_off = 0; btf->fd = -1; + btf->named_start_id = 0; if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; } if (is_mmap) { @@ -1091,6 +1154,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b err = err ?: btf_sanity_check(btf); if (err) goto done; + btf_check_sorted(btf); done: if (err) { @@ -1715,6 +1779,7 @@ static void btf_invalidate_raw_data(struct btf *btf) free(btf->raw_data_swapped); btf->raw_data_swapped = NULL; } + btf->named_start_id = 0; } /* Ensure BTF is ready to be modified (by splitting into a three memory @@ -2069,7 +2134,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding int sz, name_off; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) @@ -2117,7 +2182,7 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) int sz, name_off; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* byte_sz must be one of the explicitly allowed values */ @@ -2172,7 +2237,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2249,7 +2314,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2350,7 +2415,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, if (!m) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2388,7 +2453,7 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2446,7 +2511,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) return libbpf_err(-EINVAL); /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (value < INT_MIN || value > UINT_MAX) return libbpf_err(-E2BIG); @@ -2523,7 +2588,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) return libbpf_err(-EINVAL); /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ @@ -2563,7 +2628,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) */ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) { - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); switch (fwd_kind) { @@ -2599,7 +2664,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) */ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) { - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0); @@ -2651,7 +2716,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) */ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) { - if (!value || !value[0]) + if (str_is_empty(value)) return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0); @@ -2668,7 +2733,7 @@ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) */ int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id) { - if (!value || !value[0]) + if (str_is_empty(value)) return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1); @@ -2687,7 +2752,7 @@ int btf__add_func(struct btf *btf, const char *name, { int id; - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && linkage != BTF_FUNC_EXTERN) @@ -2773,7 +2838,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) if (!p) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2808,7 +2873,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) int sz, name_off; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && linkage != BTF_VAR_GLOBAL_EXTERN) @@ -2857,7 +2922,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) int sz, name_off; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) @@ -2934,7 +2999,7 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, struct btf_type *t; int sz, value_off; - if (!value || !value[0] || component_idx < -1) + if (str_is_empty(value) || component_idx < -1) return libbpf_err(-EINVAL); if (validate_type_id(ref_type_id)) @@ -3901,6 +3966,20 @@ err_out: return err; } +/* + * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static long btf_hash_typedef(struct btf_type *t) +{ + long h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + return h; +} + static long btf_hash_common(struct btf_type *t) { long h; @@ -3918,6 +3997,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } +/* Check structural compatibility of two TYPEDEF. */ +static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info; +} + /* Calculate type signature hash of INT or TAG. */ static long btf_hash_int_decl_tag(struct btf_type *t) { @@ -4410,11 +4496,14 @@ static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, struct btf_type *t1, *t2; int k1, k2; recur: - if (depth <= 0) - return false; - t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); + if (depth <= 0) { + pr_debug("Reached depth limit for identical type comparison for '%s'/'%s'\n", + btf__name_by_offset(d->btf, t1->name_off), + btf__name_by_offset(d->btf, t2->name_off)); + return false; + } k1 = btf_kind(t1); k2 = btf_kind(t2); @@ -4476,8 +4565,16 @@ recur: for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { if (m1->type == m2->type) continue; - if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) { + if (t1->name_off) { + pr_debug("%s '%s' size=%d vlen=%d id1[%u] id2[%u] shallow-equal but not identical for field#%d '%s'\n", + k1 == BTF_KIND_STRUCT ? "STRUCT" : "UNION", + btf__name_by_offset(d->btf, t1->name_off), + t1->size, btf_vlen(t1), id1, id2, i, + btf__name_by_offset(d->btf, m1->name_off)); + } return false; + } } return true; } @@ -4718,8 +4815,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, canon_m = btf_members(canon_type); for (i = 0; i < vlen; i++) { eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); - if (eq <= 0) + if (eq <= 0) { + if (cand_type->name_off) { + pr_debug("%s '%s' size=%d vlen=%d cand_id[%u] canon_id[%u] shallow-equal but not equiv for field#%d '%s': %d\n", + cand_kind == BTF_KIND_STRUCT ? "STRUCT" : "UNION", + btf__name_by_offset(d->btf, cand_type->name_off), + cand_type->size, vlen, cand_id, canon_id, i, + btf__name_by_offset(d->btf, cand_m->name_off), eq); + } return eq; + } cand_m++; canon_m++; } @@ -4844,13 +4949,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) } } +static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_hash_typedef(t); + else + return btf_hash_struct(t); +} + +static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_equal_typedef(t1, t2); + else + return btf_shallow_equal_struct(t1, t2); +} + /* - * Deduplicate struct/union types. + * Deduplicate struct/union and typedef types. * * For each struct/union type its type signature hash is calculated, taking * into account type's name, size, number, order and names of fields, but * ignoring type ID's referenced from fields, because they might not be deduped - * completely until after reference types deduplication phase. This type hash + * completely until after reference types deduplication phase. For each typedef + * type, the hash is computed based on the type’s name and size. This type hash * is used to iterate over all potential canonical types, sharing same hash. * For each canonical candidate we check whether type graphs that they form * (through referenced types in fields and so on) are equivalent using algorithm @@ -4882,18 +5004,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) t = btf_type_by_id(d->btf, type_id); kind = btf_kind(t); - if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + if (kind != BTF_KIND_STRUCT && + kind != BTF_KIND_UNION && + kind != BTF_KIND_TYPEDEF) return 0; - h = btf_hash_struct(t); + h = btf_hash_by_kind(t, kind); for_each_dedup_cand(d, hash_entry, h) { __u32 cand_id = hash_entry->value; int eq; /* * Even though btf_dedup_is_equiv() checks for - * btf_shallow_equal_struct() internally when checking two - * structs (unions) for equivalence, we need to guard here + * btf_equal_by_kind() internally when checking two + * structs (unions) or typedefs for equivalence, we need to guard here * from picking matching FWD type as a dedup candidate. * This can happen due to hash collision. In such case just * relying on btf_dedup_is_equiv() would lead to potentially @@ -4901,7 +5025,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) * FWD and compatible STRUCT/UNION are considered equivalent. */ cand_type = btf_type_by_id(d->btf, cand_id); - if (!btf_shallow_equal_struct(t, cand_type)) + if (!btf_equal_by_kind(t, cand_type, kind)) continue; btf_dedup_clear_hypot_map(d); @@ -4939,18 +5063,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d) /* * Deduplicate reference type. * - * Once all primitive and struct/union types got deduplicated, we can easily + * Once all primitive, struct/union and typedef types got deduplicated, we can easily * deduplicate all other (reference) BTF types. This is done in two steps: * * 1. Resolve all referenced type IDs into their canonical type IDs. This - * resolution can be done either immediately for primitive or struct/union types - * (because they were deduped in previous two phases) or recursively for + * resolution can be done either immediately for primitive, struct/union, and typedef + * types (because they were deduped in previous two phases) or recursively for * reference types. Recursion will always terminate at either primitive or - * struct/union type, at which point we can "unwind" chain of reference types - * one by one. There is no danger of encountering cycles because in C type - * system the only way to form type cycle is through struct/union, so any chain - * of reference types, even those taking part in a type cycle, will inevitably - * reach struct/union at some point. + * struct/union and typedef types, at which point we can "unwind" chain of reference + * types one by one. There is no danger of encountering cycles in C, as the only way to + * form a type cycle is through struct or union types. Go can form such cycles through + * typedef. Thus, any chain of reference types, even those taking part in a type cycle, + * will inevitably reach a struct/union or typedef type at some point. * * 2. Once all referenced type IDs are resolved into canonical ones, BTF type * becomes "stable", in the sense that no further deduplication will cause @@ -4982,7 +5106,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); @@ -5818,7 +5941,7 @@ void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) { btf->base_btf = (struct btf *)base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; } int btf__relocate(struct btf *btf, const struct btf *base_btf) @@ -5829,3 +5952,136 @@ int btf__relocate(struct btf *btf, const struct btf *base_btf) btf->owns_base = false; return libbpf_err(err); } + +struct btf_permute { + struct btf *btf; + __u32 *id_map; + __u32 start_offs; +}; + +/* Callback function to remap individual type ID references */ +static int btf_permute_remap_type_id(__u32 *type_id, void *ctx) +{ + struct btf_permute *p = ctx; + __u32 new_id = *type_id; + + /* refer to the base BTF or VOID type */ + if (new_id < p->btf->start_id) + return 0; + + if (new_id >= btf__type_cnt(p->btf)) + return -EINVAL; + + *type_id = p->id_map[new_id - p->btf->start_id + p->start_offs]; + return 0; +} + +int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, + const struct btf_permute_opts *opts) +{ + struct btf_permute p; + struct btf_ext *btf_ext; + void *nt, *new_types = NULL; + __u32 *order_map = NULL; + int err = 0, i; + __u32 n, id, start_offs = 0; + + if (!OPTS_VALID(opts, btf_permute_opts)) + return libbpf_err(-EINVAL); + + if (btf__base_btf(btf)) { + n = btf->nr_types; + } else { + if (id_map[0] != 0) + return libbpf_err(-EINVAL); + n = btf__type_cnt(btf); + start_offs = 1; + } + + if (id_map_cnt != n) + return libbpf_err(-EINVAL); + + /* record the sequence of types */ + order_map = calloc(id_map_cnt, sizeof(*id_map)); + if (!order_map) { + err = -ENOMEM; + goto done; + } + + new_types = calloc(btf->hdr->type_len, 1); + if (!new_types) { + err = -ENOMEM; + goto done; + } + + if (btf_ensure_modifiable(btf)) { + err = -ENOMEM; + goto done; + } + + for (i = start_offs; i < id_map_cnt; i++) { + id = id_map[i]; + if (id < btf->start_id || id >= btf__type_cnt(btf)) { + err = -EINVAL; + goto done; + } + id -= btf->start_id - start_offs; + /* cannot be mapped to the same ID */ + if (order_map[id]) { + err = -EINVAL; + goto done; + } + order_map[id] = i + btf->start_id - start_offs; + } + + p.btf = btf; + p.id_map = id_map; + p.start_offs = start_offs; + nt = new_types; + for (i = start_offs; i < id_map_cnt; i++) { + struct btf_field_iter it; + const struct btf_type *t; + __u32 *type_id; + int type_size; + + id = order_map[i]; + t = btf__type_by_id(btf, id); + type_size = btf_type_size(t); + memcpy(nt, t, type_size); + + /* fix up referenced IDs for BTF */ + err = btf_field_iter_init(&it, nt, BTF_FIELD_ITER_IDS); + if (err) + goto done; + while ((type_id = btf_field_iter_next(&it))) { + err = btf_permute_remap_type_id(type_id, &p); + if (err) + goto done; + } + + nt += type_size; + } + + /* fix up referenced IDs for btf_ext */ + btf_ext = OPTS_GET(opts, btf_ext, NULL); + if (btf_ext) { + err = btf_ext_visit_type_ids(btf_ext, btf_permute_remap_type_id, &p); + if (err) + goto done; + } + + for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) { + btf->type_offs[i] = nt - new_types; + nt += btf_type_size(nt); + } + + free(order_map); + free(btf->types_data); + btf->types_data = new_types; + return 0; + +done: + free(order_map); + free(new_types); + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index ccfd905f03df..b30008c267c0 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void); * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an * ELF BTF section except with a base BTF on top of which split BTF should be * based + * @param base_btf base BTF object * @return new BTF object instance which has to be eventually freed with * **btf__free()** * @@ -115,6 +116,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); * When that split BTF is loaded against a (possibly changed) base, this * distilled base BTF will help update references to that (possibly changed) * base BTF. + * @param src_btf source split BTF object + * @param new_base_btf pointer to where the new base BTF object pointer will be stored + * @param new_split_btf pointer to where the new split BTF object pointer will be stored + * @return 0 on success; negative error code, otherwise * * Both the new split and its associated new base BTF must be freed by * the caller. @@ -264,6 +269,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); * to base BTF kinds, and verify those references are compatible with * *base_btf*; if they are, *btf* is adjusted such that is re-parented to * *base_btf* and type ids and strings are adjusted to accommodate this. + * @param btf split BTF object to relocate + * @param base_btf base BTF object + * @return 0 on success; negative error code, otherwise * * If successful, 0 is returned and **btf** now has **base_btf** as its * base. @@ -273,6 +281,48 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); */ LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); +struct btf_permute_opts { + size_t sz; + /* optional .BTF.ext info along the main BTF info */ + struct btf_ext *btf_ext; + size_t :0; +}; +#define btf_permute_opts__last_field btf_ext + +/** + * @brief **btf__permute()** rearranges BTF types in-place according to a specified ID mapping + * @param btf BTF object to permute + * @param id_map Array mapping original type IDs to new IDs + * @param id_map_cnt Number of elements in @id_map + * @param opts Optional parameters, including BTF extension data for reference updates + * @return 0 on success, negative error code on failure + * + * **btf__permute()** reorders BTF types based on the provided @id_map array, + * updating all internal type references to maintain consistency. The function + * operates in-place, modifying the BTF object directly. + * + * For **base BTF**: + * - @id_map must include all types from ID 0 to `btf__type_cnt(btf) - 1` + * - @id_map_cnt must be `btf__type_cnt(btf)` + * - Mapping is defined as `id_map[original_id] = new_id` + * - `id_map[0]` must be 0 (void type cannot be moved) + * + * For **split BTF**: + * - @id_map must include only split types (types added on top of the base BTF) + * - @id_map_cnt must be `btf__type_cnt(btf) - btf__type_cnt(btf__base_btf(btf))` + * - Mapping is defined as `id_map[original_id - start_id] = new_id` + * - `start_id` equals `btf__type_cnt(btf__base_btf(btf))` + * + * After permutation, all type references within the BTF data and optional + * BTF extension (if provided via @opts) are updated automatically. + * + * On error, returns a negative error code and sets errno: + * - `-EINVAL`: Invalid parameters or invalid ID mapping + * - `-ENOMEM`: Memory allocation failure + */ +LIBBPF_API int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, + const struct btf_permute_opts *opts); + struct btf_dump; struct btf_dump_opts { diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 6388392f49a0..53c6624161d7 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1762,9 +1762,18 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u16 left_shift_bits, right_shift_bits; const __u8 *bytes = data; __u8 nr_copy_bits; + __u8 start_bit, nr_bytes; __u64 num = 0; int i; + /* Calculate how many bytes cover the bitfield */ + start_bit = bits_offset % 8; + nr_bytes = (start_bit + bit_sz + 7) / 8; + + /* Bound check */ + if (data + nr_bytes > d->typed_dump->data_end) + return -E2BIG; + /* Maximum supported bitfield size is 64 bits */ if (t->size > 8) { pr_warn("unexpected bitfield size %d\n", t->size); diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index b842b83e2480..2fa434f09cce 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -506,6 +506,68 @@ static int probe_kern_arg_ctx_tag(int token_fd) return probe_fd(prog_fd); } +static int probe_ldimm64_full_range_off(int token_fd) +{ + char log_buf[1024]; + int prog_fd, map_fd; + int ret; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + .log_buf = log_buf, + .log_size = sizeof(log_buf), + ); + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1UL << 30), + BPF_EXIT_INSN(), + }; + int insn_cnt = ARRAY_SIZE(insns); + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr", sizeof(int), 1, 1, &map_opts); + if (map_fd < 0) { + ret = -errno; + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); + return ret; + } + insns[0].imm = map_fd; + + log_buf[0] = '\0'; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "global_reloc", "GPL", insns, insn_cnt, &prog_opts); + ret = -errno; + + close(map_fd); + + if (prog_fd >= 0) { + pr_warn("Error in %s(): Program loading unexpectedly succeeded.\n", __func__); + close(prog_fd); + return -EINVAL; + } + + /* + * Feature is allowed if we're not failing with the error message + * "direct value offset of %u is not allowed" removed in + * 12a1fe6e12db ("bpf/verifier: Do not limit maximum direct offset into arena map"). + * We should instead fail with "invalid access to map value pointer". + * Ensure we match with one of the two and we're not failing with a + * different, unexpected message. + */ + if (strstr(log_buf, "direct value offset of")) + return 0; + + if (!strstr(log_buf, "invalid access to map value pointer")) { + pr_warn("Error in %s(): Program unexpectedly failed with message: %s.\n", + __func__, log_buf); + return ret; + } + + return 1; +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -581,6 +643,9 @@ static struct kern_feature_desc { [FEAT_BTF_QMARK_DATASEC] = { "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, }, + [FEAT_LDIMM64_FULL_RANGE_OFF] = { + "full range LDIMM64 support", probe_ldimm64_full_range_off, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dd3b2f57082d..0be7017800fe 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -115,6 +115,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_FENTRY] = "trace_fentry", [BPF_TRACE_FEXIT] = "trace_fexit", [BPF_MODIFY_RETURN] = "modify_return", + [BPF_TRACE_FSESSION] = "trace_fsession", [BPF_LSM_MAC] = "lsm_mac", [BPF_LSM_CGROUP] = "lsm_cgroup", [BPF_SK_LOOKUP] = "sk_lookup", @@ -190,6 +191,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", [BPF_MAP_TYPE_ARENA] = "arena", + [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", }; static const char * const prog_type_name[] = { @@ -369,6 +371,7 @@ enum reloc_type { RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, + RELO_INSN_ARRAY, }; struct reloc_desc { @@ -378,8 +381,17 @@ struct reloc_desc { const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ struct { int map_idx; - int sym_off; - int ext_idx; + unsigned int sym_off; + /* + * The following two fields can be unionized, as the + * ext_idx field is used for extern symbols, and the + * sym_size is used for jump tables, which are never + * extern + */ + union { + int ext_idx; + int sym_size; + }; }; }; }; @@ -421,6 +433,11 @@ struct bpf_sec_def { libbpf_prog_attach_fn_t prog_attach_fn; }; +struct bpf_light_subprog { + __u32 sec_insn_off; + __u32 sub_insn_off; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -494,6 +511,9 @@ struct bpf_program { __u32 line_info_cnt; __u32 prog_flags; __u8 hash[SHA256_DIGEST_LENGTH]; + + struct bpf_light_subprog *subprogs; + __u32 subprog_cnt; }; struct bpf_struct_ops { @@ -667,6 +687,7 @@ struct elf_state { int symbols_shndx; bool has_st_ops; int arena_data_shndx; + int jumptables_data_shndx; }; struct usdt_manager; @@ -737,6 +758,17 @@ struct bpf_object { int arena_map_idx; void *arena_data; size_t arena_data_sz; + size_t arena_data_off; + + void *jumptables_data; + size_t jumptables_data_sz; + + struct { + struct bpf_program *prog; + unsigned int sym_off; + int fd; + } *jumptable_maps; + size_t jumptable_map_cnt; struct kern_feature_cache *feat_cache; char *token_path; @@ -764,6 +796,7 @@ void bpf_program__unload(struct bpf_program *prog) zfree(&prog->func_info); zfree(&prog->line_info); + zfree(&prog->subprogs); } static void bpf_program__exit(struct bpf_program *prog) @@ -2872,7 +2905,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, var_extra = btf_var(var); map_name = btf__name_by_offset(obj->btf, var->name_off); - if (map_name == NULL || map_name[0] == '\0') { + if (str_is_empty(map_name)) { pr_warn("map #%d: empty name.\n", var_idx); return -EINVAL; } @@ -2960,10 +2993,11 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, void *data, size_t data_sz) { const long page_sz = sysconf(_SC_PAGE_SIZE); + const size_t data_alloc_sz = roundup(data_sz, page_sz); size_t mmap_sz; mmap_sz = bpf_map_mmap_sz(map); - if (roundup(data_sz, page_sz) > mmap_sz) { + if (data_alloc_sz > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); return -E2BIG; @@ -2996,7 +3030,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); data = elf_sec_data(obj, scn); - if (!scn || !data) { + if (!data) { pr_warn("elf: failed to get %s map definitions for %s\n", MAPS_ELF_SEC, obj->path); return -EINVAL; @@ -3942,6 +3976,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, ARENA_SEC) == 0) { obj->efile.arena_data = data; obj->efile.arena_data_shndx = idx; + } else if (strcmp(name, JUMPTABLES_SEC) == 0) { + obj->jumptables_data = malloc(data->d_size); + if (!obj->jumptables_data) + return -ENOMEM; + memcpy(obj->jumptables_data, data->d_buf, data->d_size); + obj->jumptables_data_sz = data->d_size; + obj->efile.jumptables_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -4238,7 +4279,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!sym_is_extern(sym)) continue; ext_name = elf_sym_str(obj, sym->st_name); - if (!ext_name || !ext_name[0]) + if (str_is_empty(ext_name)) continue; ext = obj->externs; @@ -4634,6 +4675,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return 0; } + /* jump table data relocation */ + if (shdr_idx == obj->efile.jumptables_data_shndx) { + reloc_desc->type = RELO_INSN_ARRAY; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = -1; + reloc_desc->sym_off = sym->st_value; + reloc_desc->sym_size = sym->st_size; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -5576,7 +5627,8 @@ retry: return err; } if (obj->arena_data) { - memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + memcpy(map->mmaped + obj->arena_data_off, obj->arena_data, + obj->arena_data_sz); zfree(&obj->arena_data); } } @@ -6144,6 +6196,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; } +static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off) +{ + size_t i; + + for (i = 0; i < obj->jumptable_map_cnt; i++) { + /* + * This might happen that same offset is used for two different + * programs (as jump tables can be the same). However, for + * different programs different maps should be created. + */ + if (obj->jumptable_maps[i].sym_off == sym_off && + obj->jumptable_maps[i].prog == prog) + return obj->jumptable_maps[i].fd; + } + + return -ENOENT; +} + +static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off, int map_fd) +{ + size_t cnt = obj->jumptable_map_cnt; + size_t size = sizeof(obj->jumptable_maps[0]); + void *tmp; + + tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + obj->jumptable_maps = tmp; + obj->jumptable_maps[cnt].prog = prog; + obj->jumptable_maps[cnt].sym_off = sym_off; + obj->jumptable_maps[cnt].fd = map_fd; + obj->jumptable_map_cnt++; + + return 0; +} + +static int find_subprog_idx(struct bpf_program *prog, int insn_idx) +{ + int i; + + for (i = prog->subprog_cnt - 1; i >= 0; i--) { + if (insn_idx >= prog->subprogs[i].sub_insn_off) + return i; + } + + return -1; +} + +static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo) +{ + const __u32 jt_entry_size = 8; + unsigned int sym_off = relo->sym_off; + int jt_size = relo->sym_size; + __u32 max_entries = jt_size / jt_entry_size; + __u32 value_size = sizeof(struct bpf_insn_array_value); + struct bpf_insn_array_value val = {}; + int subprog_idx; + int map_fd, err; + __u64 insn_off; + __u64 *jt; + __u32 i; + + map_fd = find_jt_map(obj, prog, sym_off); + if (map_fd >= 0) + return map_fd; + + if (sym_off % jt_entry_size) { + pr_warn("map '.jumptables': jumptable start %u should be multiple of %u\n", + sym_off, jt_entry_size); + return -EINVAL; + } + + if (jt_size % jt_entry_size) { + pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n", + jt_size, jt_entry_size); + return -EINVAL; + } + + map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables", + 4, value_size, max_entries, NULL); + if (map_fd < 0) + return map_fd; + + if (!obj->jumptables_data) { + pr_warn("map '.jumptables': ELF file is missing jump table data\n"); + err = -EINVAL; + goto err_close; + } + if (sym_off + jt_size > obj->jumptables_data_sz) { + pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n", + obj->jumptables_data_sz, sym_off + jt_size); + err = -EINVAL; + goto err_close; + } + + subprog_idx = -1; /* main program */ + if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) { + pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx); + err = -EINVAL; + goto err_close; + } + if (prog->subprogs) + subprog_idx = find_subprog_idx(prog, relo->insn_idx); + + jt = (__u64 *)(obj->jumptables_data + sym_off); + for (i = 0; i < max_entries; i++) { + /* + * The offset should be made to be relative to the beginning of + * the main function, not the subfunction. + */ + insn_off = jt[i]/sizeof(struct bpf_insn); + if (subprog_idx >= 0) { + insn_off -= prog->subprogs[subprog_idx].sec_insn_off; + insn_off += prog->subprogs[subprog_idx].sub_insn_off; + } else { + insn_off -= prog->sec_insn_off; + } + + /* + * LLVM-generated jump tables contain u64 records, however + * should contain values that fit in u32. + */ + if (insn_off > UINT32_MAX) { + pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %u\n", + (long long)jt[i], sym_off + i * jt_entry_size); + err = -EINVAL; + goto err_close; + } + + val.orig_off = insn_off; + err = bpf_map_update_elem(map_fd, &i, &val, 0); + if (err) + goto err_close; + } + + err = bpf_map_freeze(map_fd); + if (err) + goto err_close; + + err = add_jt_map(obj, prog, sym_off, map_fd); + if (err) + goto err_close; + + return map_fd; + +err_close: + close(map_fd); + return err; +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -6177,6 +6380,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_DATA: map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; + + if (relo->map_idx == obj->arena_map_idx) + insn[1].imm += obj->arena_data_off; + if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; @@ -6235,6 +6442,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CORE: /* will be handled by bpf_program_record_relos() */ break; + case RELO_INSN_ARRAY: { + int map_fd; + + map_fd = create_jt_map(obj, prog, relo); + if (map_fd < 0) { + pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n", + prog->name, i, relo->sym_off); + return map_fd; + } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn->imm = map_fd; + insn->off = 0; + } + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -6432,36 +6653,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra return 0; } +static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + size_t size = sizeof(main_prog->subprogs[0]); + int cnt = main_prog->subprog_cnt; + void *tmp; + + tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + main_prog->subprogs = tmp; + main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off; + main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off; + main_prog->subprog_cnt++; + + return 0; +} + static int bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *subprog) { - struct bpf_insn *insns; - size_t new_cnt; - int err; + struct bpf_insn *insns; + size_t new_cnt; + int err; - subprog->sub_insn_off = main_prog->insns_cnt; + subprog->sub_insn_off = main_prog->insns_cnt; - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + + err = save_subprog_offsets(main_prog, subprog); + if (err) { + pr_warn("prog '%s': failed to add subprog offsets: %s\n", + main_prog->name, errstr(err)); + return err; + } - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); - if (err) - return err; - return 0; + return 0; } static int @@ -7138,6 +7385,14 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat bpf_object__sort_relos(obj); } + /* place globals at the end of the arena (if supported) */ + if (obj->arena_map_idx >= 0 && kernel_supports(obj, FEAT_LDIMM64_FULL_RANGE_OFF)) { + struct bpf_map *arena_map = &obj->maps[obj->arena_map_idx]; + + obj->arena_data_off = bpf_map_mmap_sz(arena_map) - + roundup(obj->arena_data_sz, sysconf(_SC_PAGE_SIZE)); + } + /* Before relocating calls pre-process relocations and mark * few ld_imm64 instructions that points to subprogs. * Otherwise bpf_object__reloc_code() later would have to consider @@ -8245,7 +8500,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, struct bpf_object *obj = ctx; const struct btf_type *t; struct extern_desc *ext; - char *res; + const char *res; res = strstr(sym_name, ".llvm."); if (sym_type == 'd' && res) @@ -9228,6 +9483,13 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->arena_data); + zfree(&obj->jumptables_data); + obj->jumptables_data_sz = 0; + + for (i = 0; i < obj->jumptable_map_cnt; i++) + close(obj->jumptable_maps[i].fd); + zfree(&obj->jumptable_maps); + free(obj); } @@ -9607,6 +9869,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fsession+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fsession.s+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), @@ -10667,7 +10931,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) } static int validate_map_op(const struct bpf_map *map, size_t key_sz, - size_t value_sz, bool check_value_sz) + size_t value_sz, bool check_value_sz, __u64 flags) { if (!map_is_created(map)) /* map is not yet created */ return -ENOENT; @@ -10694,6 +10958,20 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz, int num_cpu = libbpf_num_possible_cpus(); size_t elem_sz = roundup(map->def.value_size, 8); + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) { + pr_warn("map '%s': BPF_F_CPU and BPF_F_ALL_CPUS are mutually exclusive\n", + map->name); + return -EINVAL; + } + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided for either BPF_F_CPU or BPF_F_ALL_CPUS, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + if (value_sz != num_cpu * elem_sz) { pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); @@ -10718,7 +10996,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10731,7 +11009,7 @@ int bpf_map__update_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10743,7 +11021,7 @@ int bpf_map__delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, flags); if (err) return libbpf_err(err); @@ -10756,7 +11034,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10768,7 +11046,7 @@ int bpf_map__get_next_key(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0); if (err) return libbpf_err(err); @@ -11325,8 +11603,6 @@ static const char *arch_specific_syscall_pfx(void) return "ia32"; #elif defined(__s390x__) return "s390x"; -#elif defined(__s390__) - return "s390"; #elif defined(__arm__) return "arm"; #elif defined(__aarch64__) @@ -11574,7 +11850,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, * * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") */ - char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + char sym_trim[256], *psym_trim = sym_trim; + const char *sym_sfx; if (!(sym_sfx = strstr(sym_name, ".llvm."))) return 0; @@ -12113,8 +12390,6 @@ static const char *arch_specific_lib_paths(void) return "/lib/i386-linux-gnu"; #elif defined(__s390x__) return "/lib/s390x-linux-gnu"; -#elif defined(__s390__) - return "/lib/s390-linux-gnu"; #elif defined(__arm__) && defined(__SOFTFP__) return "/lib/arm-linux-gnueabi"; #elif defined(__arm__) && !defined(__SOFTFP__) @@ -12159,7 +12434,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!search_paths[i]) continue; for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { - char *next_path; + const char *next_path; int seg_len; if (s[0] == ':') @@ -13858,8 +14133,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog, return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { - /* remember attach_prog_fd and let bpf_program__load() find - * BTF ID during the program load + /* Store attach_prog_fd. The BTF ID will be resolved later during + * the normal object/program load phase. */ prog->attach_prog_fd = attach_prog_fd; return 0; @@ -13891,6 +14166,37 @@ int bpf_program__set_attach_target(struct bpf_program *prog, return 0; } +int bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map, + struct bpf_prog_assoc_struct_ops_opts *opts) +{ + int prog_fd, map_fd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't associate BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err(-EINVAL); + } + + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + pr_warn("prog '%s': can't associate struct_ops program\n", prog->name); + return libbpf_err(-EINVAL); + } + + map_fd = bpf_map__fd(map); + if (map_fd < 0) { + pr_warn("map '%s': can't associate BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't associate non-struct_ops map\n", map->name); + return libbpf_err(-EINVAL); + } + + return bpf_prog_assoc_struct_ops(prog_fd, map_fd, opts); +} + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { int err = 0, n, len, start, end = -1; @@ -14156,7 +14462,10 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) if (!map_skel->mmaped) continue; - *map_skel->mmaped = map->mmaped; + if (map->def.type == BPF_MAP_TYPE_ARENA) + *map_skel->mmaped = map->mmaped + map->obj->arena_data_off; + else + *map_skel->mmaped = map->mmaped; } return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5118d0a90e24..dfc37a615578 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -448,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); /** * @brief **bpf_program__unpin()** unpins the BPF program from a file - * in the BPFFS specified by a path. This decrements the programs + * in the BPFFS specified by a path. This decrements program's in-kernel * reference count. * * The file pinning the BPF program can also be unlinked by a different @@ -481,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); /** * @brief **bpf_link__unpin()** unpins the BPF link from a file - * in the BPFFS specified by a path. This decrements the links - * reference count. + * in the BPFFS. This decrements link's in-kernel reference count. * * The file pinning the BPF link can also be unlinked by a different * process in which case this function will return an error. * - * @param prog BPF program to unpin - * @param path file path to the pin in a BPF file system + * @param link BPF link to unpin * @return 0, on success; negative error code, otherwise */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); @@ -995,14 +993,35 @@ LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); * - fentry/fexit/fmod_ret; * - lsm; * - freplace. - * @param prog BPF program to set the attach type for - * @param type attach type to set the BPF map to have + * @param prog BPF program to configure; must be not yet loaded. + * @param attach_prog_fd FD of target BPF program (for freplace/extension). + * If >0 and func name omitted, defers BTF ID resolution. + * @param attach_func_name Target function name. Used either with + * attach_prog_fd to find destination BTF type ID in that BPF program, or + * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID. + * Must be provided if attach_prog_fd is 0. * @return error code; or 0 if no error occurred. */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); +struct bpf_prog_assoc_struct_ops_opts; /* defined in bpf.h */ + +/** + * @brief **bpf_program__assoc_struct_ops()** associates a BPF program with a + * struct_ops map. + * + * @param prog BPF program + * @param map struct_ops map to be associated with the BPF program + * @param opts optional options, can be NULL + * + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int +bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map, + struct bpf_prog_assoc_struct_ops_opts *opts); + /** * @brief **bpf_object__find_map_by_name()** returns BPF map of * the given name, if it exists within the passed BPF object @@ -1098,6 +1117,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); /** * @brief **bpf_map__set_value_size()** sets map value size. * @param map the BPF map instance + * @param size the new value size * @return 0, on success; negative error, otherwise * * There is a special case for maps with associated memory-mapped regions, like @@ -1196,13 +1216,14 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory in which looked up value will be stored * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * definition's **value_size**. For per-CPU BPF maps, value size can be + * `value_size` if either **BPF_F_CPU** or **BPF_F_ALL_CPUS** is specified + * in **flags**, otherwise a product of BPF map value size and number of + * possible CPUs in the system (could be fetched with + * **libbpf_num_possible_cpus()**). Note also that for per-CPU values value + * size has to be aligned up to closest 8 bytes, so expected size is: + * `round_up(value_size, 8) * libbpf_num_possible_cpus()`. + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__lookup_elem()** is high-level equivalent of @@ -1219,14 +1240,8 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory containing bytes of the value - * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * @param value_sz refer to **bpf_map__lookup_elem**'s description.' + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__update_elem()** is high-level equivalent of @@ -1242,7 +1257,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, * @param map BPF map to delete element from * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** - * @flags extra flags passed to kernel for this operation + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__delete_elem()** is high-level equivalent of @@ -1265,7 +1280,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, * per-CPU values value size has to be aligned up to closest 8 bytes for * alignment reasons, so expected size is: `round_up(value_size, 8) * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of @@ -1637,6 +1652,7 @@ struct perf_buffer_opts { * @param sample_cb function called on each received data record * @param lost_cb function called when record loss has occurred * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @param opts optional parameters for the perf buffer, can be null * @return a new instance of struct perf_buffer on success, NULL on error with * *errno* containing an error code */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8ed8749907d4..d18fbcea7578 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -451,4 +451,7 @@ LIBBPF_1.7.0 { global: bpf_map__set_exclusive_program; bpf_map__exclusive_program; + bpf_prog_assoc_struct_ops; + bpf_program__assoc_struct_ops; + btf__permute; } LIBBPF_1.6.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 35b2527bedec..974147e8a8aa 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -74,6 +74,8 @@ #define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif +#define JUMPTABLES_SEC ".jumptables" + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) @@ -390,6 +392,8 @@ enum kern_feature_id { FEAT_ARG_CTX_TAG, /* Kernel supports '?' at the front of datasec names */ FEAT_BTF_QMARK_DATASEC, + /* Kernel supports LDIMM64 imm offsets past 512 MiB. */ + FEAT_LDIMM64_FULL_RANGE_OFF, __FEAT_CNT, }; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9dfbe7750f56..bccf4bb747e1 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: break; + case BPF_MAP_TYPE_INSN_ARRAY: + key_size = sizeof(__u32); + value_size = sizeof(struct bpf_insn_array_value); + break; case BPF_MAP_TYPE_UNSPEC: default: return -EOPNOTSUPP; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 56ae77047bc3..78f92c39290a 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -581,7 +581,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, written = 0; while (written < buf_sz) { - ret = write(fd, buf, buf_sz); + ret = write(fd, buf + written, buf_sz - written); if (ret < 0) { ret = -errno; pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); @@ -2025,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; return 0; } + + if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0) + goto add_sym; } if (sym_bind == STB_LOCAL) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index c997e69d507f..c9a78fb16f11 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -143,7 +143,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, struct nlmsghdr *nh; int len, ret; - ret = alloc_iov(&iov, 4096); + ret = alloc_iov(&iov, 8192); if (ret) goto done; @@ -212,6 +212,8 @@ start: } } } + if (len) + pr_warn("Invalid message or trailing data in Netlink response: %d bytes left\n", len); } ret = 0; done: diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index c174b4086673..d1524f6f54ae 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1376,8 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec #elif defined(__s390x__) -/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ - static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { unsigned int reg; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 4072bc9b7670..576ecc5fc312 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -211,6 +211,8 @@ SYNOPSIS struct perf_record_header_feature; struct perf_record_compressed; struct perf_record_compressed2; + struct perf_record_schedstat_cpu; + struct perf_record_schedstat_domain; -- DESCRIPTION diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 7fbb50b74c00..32301a1d8f0c 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,7 +42,6 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ --I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -51,9 +50,9 @@ INCLUDES = \ -I$(srctree)/tools/include/uapi # Append required CFLAGS +override CFLAGS := $(INCLUDES) $(CFLAGS) override CFLAGS += -g -Werror -Wall override CFLAGS += -fPIC -override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += $(EXTRA_WARNINGS) override CFLAGS += $(EXTRA_CFLAGS) @@ -100,16 +99,7 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm -ifeq ($(SRCARCH),arm64) - syscall-y := $(uapi-asm)/unistd_64.h -endif -uapi-asm-generic: - $(if $(syscall-y),\ - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ - generic=include/uapi/asm-generic $(syscall-y),) - -$(LIBPERF_IN): uapi-asm-generic FORCE +$(LIBPERF_IN): FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -130,7 +120,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) + $(TESTS_STATIC) $(TESTS_SHARED) TESTS_IN = tests-in.o @@ -179,6 +169,7 @@ install_lib: libs cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h +HDRS += schedstat-v15.h schedstat-v16.h schedstat-v17.h INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index b20a5280f2b3..4160e7d2e120 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -368,10 +368,12 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) .cpu = -1 }; - // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return __perf_cpu_map__nr(map) > 0 - ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1) - : result; + if (!map) + return result; + + // The CPUs are always sorted and nr is always > 0 as 0 length map is + // encoded as NULL. + return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1); } /** Is 'b' a subset of 'a'. */ @@ -453,21 +455,33 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - struct perf_cpu *tmp_cpus; - int tmp_len; int i, j, k; - struct perf_cpu_map *merged = NULL; + struct perf_cpu_map *merged; if (perf_cpu_map__is_subset(other, orig)) return perf_cpu_map__get(orig); if (perf_cpu_map__is_subset(orig, other)) return perf_cpu_map__get(other); - tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other)); - tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); - if (!tmp_cpus) + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { /* CPUs match. */ + i++; + j++; + k++; + } + } + if (k == 0) /* Maps are completely disjoint. */ return NULL; + merged = perf_cpu_map__alloc(k); + if (!merged) + return NULL; + /* Entries are added to merged in sorted order, so no need to sort again. */ i = j = k = 0; while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) @@ -476,11 +490,8 @@ struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, j++; else { j++; - tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); + RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++); } } - if (k) - merged = cpu_map__trim_new(k, tmp_cpus); - free(tmp_cpus); return merged; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 3ed023f4b190..1f210dadd666 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -101,6 +101,28 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } + /* + * Tool events may only read on the first CPU index to avoid double + * counting things like duration_time. Make the evsel->cpus contain just + * that single entry otherwise we may spend time changing affinity to + * CPUs that just have tool events, etc. + */ + if (evsel->reads_only_on_cpu_idx0 && perf_cpu_map__nr(evsel->cpus) > 0) { + struct perf_cpu_map *srcs[3] = { + evlist->all_cpus, + evlist->user_requested_cpus, + evsel->pmu_cpus, + }; + for (size_t i = 0; i < ARRAY_SIZE(srcs); i++) { + if (!srcs[i]) + continue; + + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__new_int(perf_cpu_map__cpu(srcs[i], 0).cpu); + break; + } + } + /* Sanity check assert before the evsel is potentially removed. */ assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus)); @@ -133,16 +155,22 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel, *n; - evlist->needs_map_propagation = true; /* Clear the all_cpus set which will be merged into during propagation. */ perf_cpu_map__put(evlist->all_cpus); evlist->all_cpus = NULL; - list_for_each_entry_safe(evsel, n, &evlist->entries, node) - __perf_evlist__propagate_maps(evlist, evsel); + /* 2 rounds so that reads_only_on_cpu_idx0 benefit from knowing the other CPU maps. */ + for (int round = 0; round < 2; round++) { + struct perf_evsel *evsel, *n; + + list_for_each_entry_safe(evsel, n, &evlist->entries, node) { + if ((!evsel->reads_only_on_cpu_idx0 && round == 0) || + (evsel->reads_only_on_cpu_idx0 && round == 1)) + __perf_evlist__propagate_maps(evlist, evsel); + } + } } void perf_evlist__add(struct perf_evlist *evlist, diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index fefe64ba5e26..b988034f1371 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -128,6 +128,8 @@ struct perf_evsel { bool requires_cpu; /** Is the PMU for the event a core one? Effects the handling of own_cpus. */ bool is_pmu_core; + /** Does the evsel on read on the first CPU index such as tool time events? */ + bool reads_only_on_cpu_idx0; int idx; }; diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h index a3f6d68edad7..06cc132d88cf 100644 --- a/tools/lib/perf/include/perf/core.h +++ b/tools/lib/perf/include/perf/core.h @@ -5,7 +5,7 @@ #include <stdarg.h> #ifndef LIBPERF_API -#define LIBPERF_API __attribute__((visibility("default"))) +#define LIBPERF_API extern __attribute__((visibility("default"))) #endif enum libperf_print_level { diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index aa1e91c97a22..9043dc72b5d6 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -151,6 +151,18 @@ struct perf_record_switch { __u32 next_prev_tid; }; +struct perf_record_callchain_deferred { + struct perf_event_header header; + /* + * This is to match kernel and (deferred) user stacks together. + * The kernel part will be in the sample callchain array after + * the PERF_CONTEXT_USER_DEFERRED entry. + */ + __u64 cookie; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; @@ -484,6 +496,71 @@ struct perf_record_bpf_metadata { struct perf_record_bpf_metadata_entry entries[]; }; +struct perf_record_schedstat_cpu_v15 { +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name +#include "schedstat-v15.h" +#undef CPU_FIELD +}; + +struct perf_record_schedstat_cpu_v16 { +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name +#include "schedstat-v16.h" +#undef CPU_FIELD +}; + +struct perf_record_schedstat_cpu_v17 { +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name +#include "schedstat-v17.h" +#undef CPU_FIELD +}; + +struct perf_record_schedstat_cpu { + struct perf_event_header header; + __u64 timestamp; + __u32 cpu; + __u16 version; + /* Padding */ + char __pad[2]; + union { + struct perf_record_schedstat_cpu_v15 v15; + struct perf_record_schedstat_cpu_v16 v16; + struct perf_record_schedstat_cpu_v17 v17; + }; +}; + +struct perf_record_schedstat_domain_v15 { +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name +#include "schedstat-v15.h" +#undef DOMAIN_FIELD +}; + +struct perf_record_schedstat_domain_v16 { +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name +#include "schedstat-v16.h" +#undef DOMAIN_FIELD +}; + +struct perf_record_schedstat_domain_v17 { +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name +#include "schedstat-v17.h" +#undef DOMAIN_FIELD +}; + +#define DOMAIN_NAME_LEN 16 + +struct perf_record_schedstat_domain { + struct perf_event_header header; + __u64 timestamp; + __u32 cpu; + __u16 version; + __u16 domain; + union { + struct perf_record_schedstat_domain_v15 v15; + struct perf_record_schedstat_domain_v16 v16; + struct perf_record_schedstat_domain_v17 v17; + }; +}; + enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, @@ -507,6 +584,8 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_FINISHED_INIT = 82, PERF_RECORD_COMPRESSED2 = 83, PERF_RECORD_BPF_METADATA = 84, + PERF_RECORD_SCHEDSTAT_CPU = 85, + PERF_RECORD_SCHEDSTAT_DOMAIN = 86, PERF_RECORD_HEADER_MAX }; @@ -523,6 +602,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; @@ -549,6 +629,8 @@ union perf_event { struct perf_record_compressed pack; struct perf_record_compressed2 pack2; struct perf_record_bpf_metadata bpf_metadata; + struct perf_record_schedstat_cpu schedstat_cpu; + struct perf_record_schedstat_domain schedstat_domain; }; #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/lib/perf/include/perf/schedstat-v15.h b/tools/lib/perf/include/perf/schedstat-v15.h new file mode 100644 index 000000000000..639458df05f8 --- /dev/null +++ b/tools/lib/perf/include/perf/schedstat-v15.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CPU_FIELD +CPU_FIELD(__u32, yld_count, "sched_yield() count", + "%11u", false, yld_count, v15); +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored", + "%11u", false, array_exp, v15); +CPU_FIELD(__u32, sched_count, "schedule() called", + "%11u", false, sched_count, v15); +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle", + "%11u", true, sched_count, v15); +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called", + "%11u", false, ttwu_count, v15); +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu", + "%11u", true, ttwu_count, v15); +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)", + "%11llu", false, rq_cpu_time, v15); +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)", + "%11llu", true, rq_cpu_time, v15); +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu", + "%11llu", false, pcount, v15); +#endif + +#ifdef DOMAIN_FIELD +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category idle> "); +#endif +DOMAIN_FIELD(__u32, idle_lb_count, + "load_balance() count on cpu idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, idle_lb_balanced, + "load_balance() found balanced on cpu idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, idle_lb_failed, + "load_balance() move task failed on cpu idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, idle_lb_imbalance, + "imbalance sum on cpu idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, idle_lb_gained, + "pull_task() count on cpu idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, idle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, idle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, idle_lb_nobusyg, + "load_balance() failed to find busier group on cpu idle", "%11u", true, v15); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u", + idle_lb_count, idle_lb_balanced, idle_lb_failed, v15); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(idle_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf", + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v15); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category busy> "); +#endif +DOMAIN_FIELD(__u32, busy_lb_count, + "load_balance() count on cpu busy", "%11u", true, v15); +DOMAIN_FIELD(__u32, busy_lb_balanced, + "load_balance() found balanced on cpu busy", "%11u", true, v15); +DOMAIN_FIELD(__u32, busy_lb_failed, + "load_balance() move task failed on cpu busy", "%11u", true, v15); +DOMAIN_FIELD(__u32, busy_lb_imbalance, + "imbalance sum on cpu busy", "%11u", false, v15); +DOMAIN_FIELD(__u32, busy_lb_gained, + "pull_task() count on cpu busy", "%11u", false, v15); +DOMAIN_FIELD(__u32, busy_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v15); +DOMAIN_FIELD(__u32, busy_lb_nobusyq, + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v15); +DOMAIN_FIELD(__u32, busy_lb_nobusyg, + "load_balance() failed to find busier group on cpu busy", "%11u", true, v15); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u", + busy_lb_count, busy_lb_balanced, busy_lb_failed, v15); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(busy_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf", + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v15); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category newidle> "); +#endif +DOMAIN_FIELD(__u32, newidle_lb_count, + "load_balance() count on cpu newly idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, newidle_lb_balanced, + "load_balance() found balanced on cpu newly idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, newidle_lb_failed, + "load_balance() move task failed on cpu newly idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, newidle_lb_imbalance, + "imbalance sum on cpu newly idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, newidle_lb_gained, + "pull_task() count on cpu newly idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v15); +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v15); +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v15); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(newidle_lb_success_count, + "load_balance() success count on cpu newly idle", "%11u", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v15); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(newidle_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v15); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category active_load_balance()> "); +#endif +DOMAIN_FIELD(__u32, alb_count, + "active_load_balance() count", "%11u", false, v15); +DOMAIN_FIELD(__u32, alb_failed, + "active_load_balance() move task failed", "%11u", false, v15); +DOMAIN_FIELD(__u32, alb_pushed, + "active_load_balance() successfully moved a task", "%11u", false, v15); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_exec()> "); +#endif +DOMAIN_FIELD(__u32, sbe_count, + "sbe_count is not used", "%11u", false, v15); +DOMAIN_FIELD(__u32, sbe_balanced, + "sbe_balanced is not used", "%11u", false, v15); +DOMAIN_FIELD(__u32, sbe_pushed, + "sbe_pushed is not used", "%11u", false, v15); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_fork()> "); +#endif +DOMAIN_FIELD(__u32, sbf_count, + "sbf_count is not used", "%11u", false, v15); +DOMAIN_FIELD(__u32, sbf_balanced, + "sbf_balanced is not used", "%11u", false, v15); +DOMAIN_FIELD(__u32, sbf_pushed, + "sbf_pushed is not used", "%11u", false, v15); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Wakeup Info> "); +#endif +DOMAIN_FIELD(__u32, ttwu_wake_remote, + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v15); +DOMAIN_FIELD(__u32, ttwu_move_affine, + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v15); +DOMAIN_FIELD(__u32, ttwu_move_balance, + "try_to_wake_up() started passive balancing", "%11u", false, v15); +#endif /* DOMAIN_FIELD */ diff --git a/tools/lib/perf/include/perf/schedstat-v16.h b/tools/lib/perf/include/perf/schedstat-v16.h new file mode 100644 index 000000000000..3462b79c29af --- /dev/null +++ b/tools/lib/perf/include/perf/schedstat-v16.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CPU_FIELD +CPU_FIELD(__u32, yld_count, "sched_yield() count", + "%11u", false, yld_count, v16); +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored", + "%11u", false, array_exp, v16); +CPU_FIELD(__u32, sched_count, "schedule() called", + "%11u", false, sched_count, v16); +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle", + "%11u", true, sched_count, v16); +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called", + "%11u", false, ttwu_count, v16); +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu", + "%11u", true, ttwu_count, v16); +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)", + "%11llu", false, rq_cpu_time, v16); +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)", + "%11llu", true, rq_cpu_time, v16); +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu", + "%11llu", false, pcount, v16); +#endif /* CPU_FIELD */ + +#ifdef DOMAIN_FIELD +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category busy> "); +#endif +DOMAIN_FIELD(__u32, busy_lb_count, + "load_balance() count on cpu busy", "%11u", true, v16); +DOMAIN_FIELD(__u32, busy_lb_balanced, + "load_balance() found balanced on cpu busy", "%11u", true, v16); +DOMAIN_FIELD(__u32, busy_lb_failed, + "load_balance() move task failed on cpu busy", "%11u", true, v16); +DOMAIN_FIELD(__u32, busy_lb_imbalance, + "imbalance sum on cpu busy", "%11u", false, v16); +DOMAIN_FIELD(__u32, busy_lb_gained, + "pull_task() count on cpu busy", "%11u", false, v16); +DOMAIN_FIELD(__u32, busy_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v16); +DOMAIN_FIELD(__u32, busy_lb_nobusyq, + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v16); +DOMAIN_FIELD(__u32, busy_lb_nobusyg, + "load_balance() failed to find busier group on cpu busy", "%11u", true, v16); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u", + busy_lb_count, busy_lb_balanced, busy_lb_failed, v16); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(busy_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf", + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v16); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category idle> "); +#endif +DOMAIN_FIELD(__u32, idle_lb_count, + "load_balance() count on cpu idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, idle_lb_balanced, + "load_balance() found balanced on cpu idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, idle_lb_failed, + "load_balance() move task failed on cpu idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, idle_lb_imbalance, + "imbalance sum on cpu idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, idle_lb_gained, + "pull_task() count on cpu idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, idle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, idle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, idle_lb_nobusyg, + "load_balance() failed to find busier group on cpu idle", "%11u", true, v16); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u", + idle_lb_count, idle_lb_balanced, idle_lb_failed, v16); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(idle_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf", + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v16); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category newidle> "); +#endif +DOMAIN_FIELD(__u32, newidle_lb_count, + "load_balance() count on cpu newly idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, newidle_lb_balanced, + "load_balance() found balanced on cpu newly idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, newidle_lb_failed, + "load_balance() move task failed on cpu newly idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, newidle_lb_imbalance, + "imbalance sum on cpu newly idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, newidle_lb_gained, + "pull_task() count on cpu newly idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v16); +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v16); +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v16); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(newidle_lb_success_count, + "load_balance() success count on cpu newly idle", "%11u", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v16); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(newidle_lb_avg_count, + "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v16); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category active_load_balance()> "); +#endif +DOMAIN_FIELD(__u32, alb_count, + "active_load_balance() count", "%11u", false, v16); +DOMAIN_FIELD(__u32, alb_failed, + "active_load_balance() move task failed", "%11u", false, v16); +DOMAIN_FIELD(__u32, alb_pushed, + "active_load_balance() successfully moved a task", "%11u", false, v16); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_exec()> "); +#endif +DOMAIN_FIELD(__u32, sbe_count, + "sbe_count is not used", "%11u", false, v16); +DOMAIN_FIELD(__u32, sbe_balanced, + "sbe_balanced is not used", "%11u", false, v16); +DOMAIN_FIELD(__u32, sbe_pushed, + "sbe_pushed is not used", "%11u", false, v16); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_fork()> "); +#endif +DOMAIN_FIELD(__u32, sbf_count, + "sbf_count is not used", "%11u", false, v16); +DOMAIN_FIELD(__u32, sbf_balanced, + "sbf_balanced is not used", "%11u", false, v16); +DOMAIN_FIELD(__u32, sbf_pushed, + "sbf_pushed is not used", "%11u", false, v16); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Wakeup Info> "); +#endif +DOMAIN_FIELD(__u32, ttwu_wake_remote, + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v16); +DOMAIN_FIELD(__u32, ttwu_move_affine, + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v16); +DOMAIN_FIELD(__u32, ttwu_move_balance, + "try_to_wake_up() started passive balancing", "%11u", false, v16); +#endif /* DOMAIN_FIELD */ diff --git a/tools/lib/perf/include/perf/schedstat-v17.h b/tools/lib/perf/include/perf/schedstat-v17.h new file mode 100644 index 000000000000..865dc7c1039c --- /dev/null +++ b/tools/lib/perf/include/perf/schedstat-v17.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CPU_FIELD +CPU_FIELD(__u32, yld_count, "sched_yield() count", + "%11u", false, yld_count, v17); +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored", + "%11u", false, array_exp, v17); +CPU_FIELD(__u32, sched_count, "schedule() called", + "%11u", false, sched_count, v17); +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle", + "%11u", true, sched_count, v17); +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called", + "%11u", false, ttwu_count, v17); +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu", + "%11u", true, ttwu_count, v17); +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)", + "%11llu", false, rq_cpu_time, v17); +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)", + "%11llu", true, rq_cpu_time, v17); +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu", + "%11llu", false, pcount, v17); +#endif /* CPU_FIELD */ + +#ifdef DOMAIN_FIELD +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category busy> "); +#endif +DOMAIN_FIELD(__u32, busy_lb_count, + "load_balance() count on cpu busy", "%11u", true, v17); +DOMAIN_FIELD(__u32, busy_lb_balanced, + "load_balance() found balanced on cpu busy", "%11u", true, v17); +DOMAIN_FIELD(__u32, busy_lb_failed, + "load_balance() move task failed on cpu busy", "%11u", true, v17); +DOMAIN_FIELD(__u32, busy_lb_imbalance_load, + "imbalance in load on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_imbalance_util, + "imbalance in utilization on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_imbalance_task, + "imbalance in number of tasks on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_imbalance_misfit, + "imbalance in misfit tasks on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_gained, + "pull_task() count on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v17); +DOMAIN_FIELD(__u32, busy_lb_nobusyq, + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v17); +DOMAIN_FIELD(__u32, busy_lb_nobusyg, + "load_balance() failed to find busier group on cpu busy", "%11u", true, v17); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u", + busy_lb_count, busy_lb_balanced, busy_lb_failed, v17); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(busy_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf", + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v17); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category idle> "); +#endif +DOMAIN_FIELD(__u32, idle_lb_count, + "load_balance() count on cpu idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, idle_lb_balanced, + "load_balance() found balanced on cpu idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, idle_lb_failed, + "load_balance() move task failed on cpu idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, idle_lb_imbalance_load, + "imbalance in load on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_imbalance_util, + "imbalance in utilization on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_imbalance_task, + "imbalance in number of tasks on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_imbalance_misfit, + "imbalance in misfit tasks on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_gained, + "pull_task() count on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, idle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, idle_lb_nobusyg, + "load_balance() failed to find busier group on cpu idle", "%11u", true, v17); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u", + idle_lb_count, idle_lb_balanced, idle_lb_failed, v17); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(idle_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf", + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v17); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category newidle> "); +#endif +DOMAIN_FIELD(__u32, newidle_lb_count, + "load_balance() count on cpu newly idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, newidle_lb_balanced, + "load_balance() found balanced on cpu newly idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, newidle_lb_failed, + "load_balance() move task failed on cpu newly idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, newidle_lb_imbalance_load, + "imbalance in load on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_imbalance_util, + "imbalance in utilization on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_imbalance_task, + "imbalance in number of tasks on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_imbalance_misfit, + "imbalance in misfit tasks on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_gained, + "pull_task() count on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v17); +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v17); +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v17); +#ifdef DERIVED_CNT_FIELD +DERIVED_CNT_FIELD(newidle_lb_success_count, + "load_balance() success count on cpu newly idle", "%11u", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v17); +#endif +#ifdef DERIVED_AVG_FIELD +DERIVED_AVG_FIELD(newidle_lb_avg_pulled, + "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf", + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v17); +#endif +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category active_load_balance()> "); +#endif +DOMAIN_FIELD(__u32, alb_count, + "active_load_balance() count", "%11u", false, v17); +DOMAIN_FIELD(__u32, alb_failed, + "active_load_balance() move task failed", "%11u", false, v17); +DOMAIN_FIELD(__u32, alb_pushed, + "active_load_balance() successfully moved a task", "%11u", false, v17); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_exec()> "); +#endif +DOMAIN_FIELD(__u32, sbe_count, + "sbe_count is not used", "%11u", false, v17); +DOMAIN_FIELD(__u32, sbe_balanced, + "sbe_balanced is not used", "%11u", false, v17); +DOMAIN_FIELD(__u32, sbe_pushed, + "sbe_pushed is not used", "%11u", false, v17); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Category sched_balance_fork()> "); +#endif +DOMAIN_FIELD(__u32, sbf_count, + "sbf_count is not used", "%11u", false, v17); +DOMAIN_FIELD(__u32, sbf_balanced, + "sbf_balanced is not used", "%11u", false, v17); +DOMAIN_FIELD(__u32, sbf_pushed, + "sbf_pushed is not used", "%11u", false, v17); +#ifdef DOMAIN_CATEGORY +DOMAIN_CATEGORY(" <Wakeup Info> "); +#endif +DOMAIN_FIELD(__u32, ttwu_wake_remote, + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v17); +DOMAIN_FIELD(__u32, ttwu_move_affine, + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v17); +DOMAIN_FIELD(__u32, ttwu_move_balance, + "try_to_wake_up() started passive balancing", "%11u", false, v17); +#endif /* DOMAIN_FIELD */ diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/__init__.py diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/abi/__init__.py diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py new file mode 100644 index 000000000000..d7bb20ef3acc --- /dev/null +++ b/tools/lib/python/abi/abi_parser.py @@ -0,0 +1,631 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +from argparse import Namespace +import logging +import os +import re + +from pprint import pformat +from random import randrange, seed + +# Import Python modules + +from abi.helpers import AbiDebug, ABI_DIR + + +class AbiParser: + """Main class to parse ABI files.""" + + #: Valid tags at Documentation/ABI. + TAGS = r"(what|where|date|kernelversion|contact|description|users)" + + #: ABI elements that will auto-generate cross-references. + XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" + + def __init__(self, directory, logger=None, + enable_lineno=False, show_warnings=True, debug=0): + """Stores arguments for the class and initialize class vars.""" + + self.directory = directory + self.enable_lineno = enable_lineno + self.show_warnings = show_warnings + self.debug = debug + + if not logger: + self.log = logging.getLogger("get_abi") + else: + self.log = logger + + self.data = {} + self.what_symbols = {} + self.file_refs = {} + self.what_refs = {} + + # Ignore files that contain such suffixes + self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") + + # Regular expressions used on parser + self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) + self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) + self.re_valid = re.compile(self.TAGS) + self.re_start_spc = re.compile(r"(\s*)(\S.*)") + self.re_whitespace = re.compile(r"^\s+") + + # Regular used on print + self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") + self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") + self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") + self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") + self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") + self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") + self.re_xref_node = re.compile(self.XREF) + + def warn(self, fdata, msg, extra=None): + """Displays a parse error if warning is enabled.""" + + if not self.show_warnings: + return + + msg = f"{fdata.fname}:{fdata.ln}: {msg}" + if extra: + msg += "\n\t\t" + extra + + self.log.warning(msg) + + def add_symbol(self, what, fname, ln=None, xref=None): + """Create a reference table describing where each 'what' is located.""" + + if what not in self.what_symbols: + self.what_symbols[what] = {"file": {}} + + if fname not in self.what_symbols[what]["file"]: + self.what_symbols[what]["file"][fname] = [] + + if ln and ln not in self.what_symbols[what]["file"][fname]: + self.what_symbols[what]["file"][fname].append(ln) + + if xref: + self.what_symbols[what]["xref"] = xref + + def _parse_line(self, fdata, line): + """Parse a single line of an ABI file.""" + + new_what = False + new_tag = False + content = None + + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + sep = match.group(2) + content = match.group(3) + + match = self.re_valid.search(new) + if match: + new_tag = match.group(1) + else: + if fdata.tag == "description": + # New "tag" is actually part of description. + # Don't consider it a tag + new_tag = False + elif fdata.tag != "": + self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) + + if new_tag: + # "where" is Invalid, but was a common mistake. Warn if found + if new_tag == "where": + self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") + new_tag = "what" + + if new_tag == "what": + fdata.space = None + + if content not in self.what_symbols: + self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) + + if fdata.tag == "what": + fdata.what.append(content.strip("\n")) + else: + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fdata.fname, + ln=fdata.what_ln, xref=fdata.key) + + fdata.label = content + new_what = True + + key = "abi_" + content.lower() + fdata.key = self.re_unprintable.sub("_", key).strip("_") + + # Avoid duplicated keys but using a defined seed, to make + # the namespace identical if there aren't changes at the + # ABI symbols + seed(42) + + while fdata.key in self.data: + char = randrange(0, 51) + ord("A") + if char > ord("Z"): + char += ord("a") - ord("Z") - 1 + + fdata.key += chr(char) + + if fdata.key and fdata.key not in self.data: + self.data[fdata.key] = { + "what": [content], + "file": [fdata.file_ref], + "path": fdata.ftype, + "line_no": fdata.ln, + } + + fdata.what = self.data[fdata.key]["what"] + + self.what_refs[content] = fdata.key + fdata.tag = new_tag + fdata.what_ln = fdata.ln + + if fdata.nametag["what"]: + t = (content, fdata.key) + if t not in fdata.nametag["symbols"]: + fdata.nametag["symbols"].append(t) + + return + + if fdata.tag and new_tag: + fdata.tag = new_tag + + if new_what: + fdata.label = "" + + if "description" in self.data[fdata.key]: + self.data[fdata.key]["description"] += "\n\n" + + if fdata.file_ref not in self.data[fdata.key]["file"]: + self.data[fdata.key]["file"].append(fdata.file_ref) + + if self.debug == AbiDebug.WHAT_PARSING: + self.log.debug("what: %s", fdata.what) + + if not fdata.what: + self.warn(fdata, "'What:' should come first:", line) + return + + if new_tag == "description": + fdata.space = None + + if content: + sep = sep.replace(":", " ") + + c = " " * len(new_tag) + sep + content + c = c.expandtabs() + + match = self.re_start_spc.match(c) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + content = match.group(2) + "\n" + + self.data[fdata.key][fdata.tag] = content + + return + + # Store any contents before tags at the database + if not fdata.tag and "what" in fdata.nametag: + fdata.nametag["description"] += line + return + + if fdata.tag == "description": + content = line.expandtabs() + + if self.re_whitespace.sub("", content) == "": + self.data[fdata.key][fdata.tag] += "\n" + return + + if fdata.space is None: + match = self.re_start_spc.match(content) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + + content = match.group(2) + "\n" + else: + if content.startswith(fdata.space): + content = content[len(fdata.space):] + + else: + fdata.space = "" + + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += content + return + + content = line.strip() + if fdata.tag: + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") + return + + # Everything else is error + if content: + self.warn(fdata, "Unexpected content", line) + + def parse_readme(self, nametag, fname): + """Parse ABI README file.""" + + nametag["what"] = ["Introduction"] + nametag["path"] = "README" + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + + match = self.re_valid.search(new) + if match: + nametag["description"] += "\n:" + line + continue + + nametag["description"] += line + + def parse_file(self, fname, path, basename): + """Parse a single file.""" + + ref = f"abi_file_{path}_{basename}" + ref = self.re_unprintable.sub("_", ref).strip("_") + + # Store per-file state into a namespace variable. This will be used + # by the per-line parser state machine and by the warning function. + fdata = Namespace + + fdata.fname = fname + fdata.name = basename + + pos = fname.find(ABI_DIR) + if pos > 0: + f = fname[pos:] + else: + f = fname + + fdata.file_ref = (f, ref) + self.file_refs[f] = ref + + fdata.ln = 0 + fdata.what_ln = 0 + fdata.tag = "" + fdata.label = "" + fdata.what = [] + fdata.key = None + fdata.xrefs = None + fdata.space = None + fdata.ftype = path.split("/")[0] + + fdata.nametag = {} + fdata.nametag["what"] = [f"ABI file {path}/{basename}"] + fdata.nametag["type"] = "File" + fdata.nametag["path"] = fdata.ftype + fdata.nametag["file"] = [fdata.file_ref] + fdata.nametag["line_no"] = 1 + fdata.nametag["description"] = "" + fdata.nametag["symbols"] = [] + + self.data[ref] = fdata.nametag + + if self.debug & AbiDebug.WHAT_OPEN: + self.log.debug("Opening file %s", fname) + + if basename == "README": + self.parse_readme(fdata.nametag, fname) + return + + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + fdata.ln += 1 + + self._parse_line(fdata, line) + + if "description" in fdata.nametag: + fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") + + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fname, xref=fdata.key) + + def _parse_abi(self, root=None): + """Internal function to parse documentation ABI recursively.""" + + if not root: + root = self.directory + + with os.scandir(root) as obj: + for entry in obj: + name = os.path.join(root, entry.name) + + if entry.is_dir(): + self._parse_abi(name) + continue + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if basename.startswith("."): + continue + + if basename.endswith(self.ignore_suffixes): + continue + + path = self.re_abi_dir.sub("", os.path.dirname(name)) + + self.parse_file(name, path, basename) + + def parse_abi(self, root=None): + """Parse documentation ABI.""" + + self._parse_abi(root) + + if self.debug & AbiDebug.DUMP_ABI_STRUCTS: + self.log.debug(pformat(self.data)) + + def desc_txt(self, desc): + """Print description as found inside ABI files.""" + + desc = desc.strip(" \t\n") + + return desc + "\n\n" + + def xref(self, fname): + """ + Converts a Documentation/ABI + basename into a ReST cross-reference. + """ + + xref = self.file_refs.get(fname) + if not xref: + return None + else: + return xref + + def desc_rst(self, desc): + """Enrich ReST output by creating cross-references.""" + + # Remove title markups from the description + # Having titles inside ABI files will only work if extra + # care would be taken in order to strictly follow the same + # level order for each markup. + desc = self.re_title_mark.sub("\n\n", "\n" + desc) + desc = desc.rstrip(" \t\n").lstrip("\n") + + # Python's regex performance for non-compiled expressions is a lot + # than Perl, as Perl automatically caches them at their + # first usage. Here, we'll need to do the same, as otherwise the + # performance penalty is be high + + new_desc = "" + for d in desc.split("\n"): + if d == "": + new_desc += "\n" + continue + + # Use cross-references for doc files where needed + d = self.re_doc.sub(r":doc:`/\1`", d) + + # Use cross-references for ABI generated docs where needed + matches = self.re_abi.findall(d) + for m in matches: + abi = m[0] + m[1] + + xref = self.file_refs.get(abi) + if not xref: + # This may happen if ABI is on a separate directory, + # like parsing ABI testing and symbol is at stable. + # The proper solution is to move this part of the code + # for it to be inside sphinx/kernel_abi.py + self.log.info("Didn't find ABI reference for '%s'", abi) + else: + new = self.re_escape.sub(r"\\\1", m[1]) + d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) + + # Seek for cross reference symbols like /sys/... + # Need to be careful to avoid doing it on a code block + if d[0] not in [" ", "\t"]: + matches = self.re_xref_node.findall(d) + for m in matches: + # Finding ABI here is more complex due to wildcards + xref = self.what_refs.get(m) + if xref: + new = self.re_escape.sub(r"\\\1", m) + d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) + + new_desc += d + "\n" + + return new_desc + "\n\n" + + def doc(self, output_in_txt=False, show_symbols=True, show_file=True, + filter_path=None): + """Print ABI at stdout.""" + + part = None + for key, v in sorted(self.data.items(), + key=lambda x: (x[1].get("type", ""), + x[1].get("what"))): + + wtype = v.get("type", "Symbol") + file_ref = v.get("file") + names = v.get("what", [""]) + + if wtype == "File": + if not show_file: + continue + else: + if not show_symbols: + continue + + if filter_path: + if v.get("path") != filter_path: + continue + + msg = "" + + if wtype != "File": + cur_part = names[0] + if cur_part.find("/") >= 0: + match = self.re_what.match(cur_part) + if match: + symbol = match.group(1).rstrip("/") + cur_part = "Symbols under " + symbol + + if cur_part and cur_part != part: + part = cur_part + msg += part + "\n"+ "-" * len(part) +"\n\n" + + msg += f".. _{key}:\n\n" + + max_len = 0 + for i in range(0, len(names)): # pylint: disable=C0200 + names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" + + max_len = max(max_len, len(names[i])) + + msg += "+-" + "-" * max_len + "-+\n" + for name in names: + msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" + msg += "+-" + "-" * max_len + "-+\n" + msg += "\n" + + for ref in file_ref: + if wtype == "File": + msg += f".. _{ref[1]}:\n\n" + else: + base = os.path.basename(ref[0]) + msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" + + if wtype == "File": + msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" + + desc = v.get("description") + if not desc and wtype != "File": + msg += f"DESCRIPTION MISSING for {names[0]}\n\n" + + if desc: + if output_in_txt: + msg += self.desc_txt(desc) + else: + msg += self.desc_rst(desc) + + symbols = v.get("symbols") + if symbols: + msg += "Has the following ABI:\n\n" + + for w, label in symbols: + # Escape special chars from content + content = self.re_escape.sub(r"\\\1", w) + + msg += f"- :ref:`{content} <{label}>`\n\n" + + users = v.get("users") + if users and users.strip(" \t\n"): + users = users.strip("\n").replace('\n', '\n\t') + msg += f"Users:\n\t{users}\n\n" + + ln = v.get("line_no", 1) + + yield (msg, file_ref[0][0], ln) + + def check_issues(self): + """Warn about duplicated ABI entries.""" + + for what, v in self.what_symbols.items(): + files = v.get("file") + if not files: + # Should never happen if the parser works properly + self.log.warning("%s doesn't have a file associated", what) + continue + + if len(files) == 1: + continue + + f = [] + for fname, lines in sorted(files.items()): + if not lines: + f.append(f"{fname}") + elif len(lines) == 1: + f.append(f"{fname}:{lines[0]}") + else: + m = fname + "lines " + m += ", ".join(str(x) for x in lines) + f.append(m) + + self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) + + def search_symbols(self, expr): + """ Searches for ABI symbols.""" + + regex = re.compile(expr, re.I) + + found_keys = 0 + for t in sorted(self.data.items(), key=lambda x: [0]): + v = t[1] + + wtype = v.get("type", "") + if wtype == "File": + continue + + for what in v.get("what", [""]): + if regex.search(what): + found_keys += 1 + + kernelversion = v.get("kernelversion", "").strip(" \t\n") + date = v.get("date", "").strip(" \t\n") + contact = v.get("contact", "").strip(" \t\n") + users = v.get("users", "").strip(" \t\n") + desc = v.get("description", "").strip(" \t\n") + + files = [] + for f in v.get("file", ()): + files.append(f[0]) + + what = str(found_keys) + ". " + what + title_tag = "-" * len(what) + + print(f"\n{what}\n{title_tag}\n") + + if kernelversion: + print(f"Kernel version:\t\t{kernelversion}") + + if date: + print(f"Date:\t\t\t{date}") + + if contact: + print(f"Contact:\t\t{contact}") + + if users: + print(f"Users:\t\t\t{users}") + + print("Defined on file(s):\t" + ", ".join(files)) + + if desc: + desc = desc.strip("\n") + print(f"\n{desc}\n") + + if not found_keys: + print(f"Regular expression /{expr}/ not found.") diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py new file mode 100644 index 000000000000..d0c5e3ede6b5 --- /dev/null +++ b/tools/lib/python/abi/abi_regex.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# xxpylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Convert ABI what into regular expressions +""" + +import re +import sys + +from pprint import pformat + +from abi.abi_parser import AbiParser +from abi.helpers import AbiDebug + +class AbiRegex(AbiParser): + """ + Extends AbiParser to search ABI nodes with regular expressions. + + There some optimizations here to allow a quick symbol search: + instead of trying to place all symbols altogether an doing linear + search which is very time consuming, create a tree with one depth, + grouping similar symbols altogether. + + Yet, sometimes a full search will be needed, so we have a special branch + on such group tree where other symbols are placed. + """ + + #: Escape only ASCII visible characters. + escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" + + #: Special group for other nodes. + leave_others = "others" + + # Tuples with regular expressions to be compiled and replacement data + re_whats = [ + # Drop escape characters that might exist + (re.compile("\\\\"), ""), + + # Temporarily escape dot characters + (re.compile(r"\."), "\xf6"), + + # Temporarily change [0-9]+ type of patterns + (re.compile(r"\[0\-9\]\+"), "\xff"), + + # Temporarily change [\d+-\d+] type of patterns + (re.compile(r"\[0\-\d+\]"), "\xff"), + (re.compile(r"\[0:\d+\]"), "\xff"), + (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), + + # Temporarily change [0-9] type of patterns + (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), + + # Handle multiple option patterns + (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), + + # Handle wildcards + (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), + (re.compile(r"/\*/"), "/.*/"), + (re.compile(r"/\xf6\xf6\xf6"), "/.*"), + (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), + (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), + (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), + + (re.compile(r"XX+"), "\\\\w\xf7"), + (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), + (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), + (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), + + # Recover [0-9] type of patterns + (re.compile(r"\xf4"), "["), + (re.compile(r"\xf5"), "]"), + + # Remove duplicated spaces + (re.compile(r"\s+"), r" "), + + # Special case: drop comparison as in: + # What: foo = <something> + # (this happens on a few IIO definitions) + (re.compile(r"\s*\=.*$"), ""), + + # Escape all other symbols + (re.compile(escape_symbols), r"\\\1"), + (re.compile(r"\\\\"), r"\\"), + (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), + (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), + + (re.compile(r"\xff"), r"\\d+"), + + # Special case: IIO ABI which a parenthesis. + (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), + + # Simplify regexes with multiple .* + (re.compile(r"(?:\.\*){2,}"), ""), + + # Recover dot characters + (re.compile(r"\xf6"), "\\."), + # Recover plus characters + (re.compile(r"\xf7"), "+"), + ] + + #: Regex to check if the symbol name has a number on it. + re_has_num = re.compile(r"\\d") + + #: Symbol name after escape_chars that are considered a devnode basename. + re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") + + #: List of popular group names to be skipped to minimize regex group size + #: Use AbiDebug.SUBGROUP_SIZE to detect those. + skip_names = set(["devices", "hwmon"]) + + def regex_append(self, what, new): + """ + Get a search group for a subset of regular expressions. + + As ABI may have thousands of symbols, using a for to search all + regular expressions is at least O(n^2). When there are wildcards, + the complexity increases substantially, eventually becoming exponential. + + To avoid spending too much time on them, use a logic to split + them into groups. The smaller the group, the better, as it would + mean that searches will be confined to a small number of regular + expressions. + + The conversion to a regex subset is tricky, as we need something + that can be easily obtained from the sysfs symbol and from the + regular expression. So, we need to discard nodes that have + wildcards. + + If it can't obtain a subgroup, place the regular expression inside + a special group (self.leave_others). + """ + + search_group = None + + for search_group in reversed(new.split("/")): + if not search_group or search_group in self.skip_names: + continue + if self.re_symbol_name.match(search_group): + break + + if not search_group: + search_group = self.leave_others + + if self.debug & AbiDebug.SUBGROUP_MAP: + self.log.debug("%s: mapped as %s", what, search_group) + + try: + if search_group not in self.regex_group: + self.regex_group[search_group] = [] + + self.regex_group[search_group].append(re.compile(new)) + if self.search_string: + if what.find(self.search_string) >= 0: + print(f"What: {what}") + except re.PatternError: + self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" + " '%s'", what, new) + + def get_regexes(self, what): + """ + Given an ABI devnode, return a list of all regular expressions that + may match it, based on the sub-groups created by regex_append(). + """ + + re_list = [] + + patches = what.split("/") + patches.reverse() + patches.append(self.leave_others) + + for search_group in patches: + if search_group in self.regex_group: + re_list += self.regex_group[search_group] + + return re_list + + def __init__(self, *args, **kwargs): + """ + Override init method to get verbose argument + """ + + self.regex_group = None + self.search_string = None + self.re_string = None + + if "search_string" in kwargs: + self.search_string = kwargs.get("search_string") + del kwargs["search_string"] + + if self.search_string: + + try: + self.re_string = re.compile(self.search_string) + except re.PatternError as e: + msg = f"{self.search_string} is not a valid regular expression" + raise ValueError(msg) from e + + super().__init__(*args, **kwargs) + + def parse_abi(self, *args, **kwargs): + + super().parse_abi(*args, **kwargs) + + self.regex_group = {} + + print("Converting ABI What fields into regexes...", file=sys.stderr) + + for t in sorted(self.data.items(), key=lambda x: x[0]): + v = t[1] + if v.get("type") == "File": + continue + + v["regex"] = [] + + for what in v.get("what", []): + if not what.startswith("/sys"): + continue + + new = what + for r, s in self.re_whats: + try: + new = r.sub(s, new) + except re.PatternError as e: + # Help debugging troubles with new regexes + raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e + + v["regex"].append(new) + + if self.debug & AbiDebug.REGEX: + self.log.debug("%-90s <== %s", new, what) + + # Store regex into a subgroup to speedup searches + self.regex_append(what, new) + + if self.debug & AbiDebug.SUBGROUP_DICT: + self.log.debug("%s", pformat(self.regex_group)) + + if self.debug & AbiDebug.SUBGROUP_SIZE: + biggestd_keys = sorted(self.regex_group.keys(), + key= lambda k: len(self.regex_group[k]), + reverse=True) + + print("Top regex subgroups:", file=sys.stderr) + for k in biggestd_keys[:10]: + print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py new file mode 100644 index 000000000000..2a378d780d3c --- /dev/null +++ b/tools/lib/python/abi/helpers.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=R0903 +# SPDX-License-Identifier: GPL-2.0 + +""" +Helper classes for ABI parser +""" + +ABI_DIR = "Documentation/ABI/" + + +class AbiDebug: + """Debug levels""" + + WHAT_PARSING = 1 #: Enable debug parsing logic. + WHAT_OPEN = 2 #: Enable debug messages on file open. + DUMP_ABI_STRUCTS = 4 #: Enable debug for ABI parse data. + UNDEFINED = 8 #: Enable extra undefined symbol data. + REGEX = 16 #: Enable debug for what to regex conversion. + SUBGROUP_MAP = 32 #: Enable debug for symbol regex subgroups + SUBGROUP_DICT = 64 #: Enable debug for sysfs graph tree variable. + SUBGROUP_SIZE = 128 #: Enable debug of search groups. + GRAPH = 256 #: Display ref tree graph for undefined symbols. + +#: Helper messages for each debug variable +DEBUG_HELP = """ +1 - enable debug parsing logic +2 - enable debug messages on file open +4 - enable debug for ABI parse data +8 - enable extra debug information to identify troubles + with ABI symbols found at the local machine that + weren't found on ABI documentation (used only for + undefined subcommand) +16 - enable debug for what to regex conversion +32 - enable debug for symbol regex subgroups +64 - enable debug for sysfs graph tree variable +128 - enable debug of search groups +256 - enable displaying refrence tree graphs for undefined symbols. +""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py new file mode 100644 index 000000000000..7bbefd274ea2 --- /dev/null +++ b/tools/lib/python/abi/system_symbols.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0912,R0914,R0915,R1702 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import os +import re +import sys + +from concurrent import futures +from datetime import datetime +from random import shuffle + +from abi.helpers import AbiDebug + +class SystemSymbols: + """Stores arguments for the class and initialize class vars.""" + + def graph_add_file(self, path, link=None): + """ + add a file path to the sysfs graph stored at self.root. + """ + + if path in self.files: + return + + name = "" + ref = self.root + for edge in path.split("/"): + name += edge + "/" + if edge not in ref: + ref[edge] = {"__name": [name.rstrip("/")]} + + ref = ref[edge] + + if link and link not in ref["__name"]: + ref["__name"].append(link.rstrip("/")) + + self.files.add(path) + + def print_graph(self, root_prefix="", root=None, level=0): + """Prints a reference tree graph using UTF-8 characters.""" + + if not root: + root = self.root + level = 0 + + # Prevent endless traverse + if level > 5: + return + + if level > 0: + prefix = "├──" + last_prefix = "└──" + else: + prefix = "" + last_prefix = "" + + items = list(root.items()) + + names = root.get("__name", []) + for k, edge in items: + if k == "__name": + continue + + if not k: + k = "/" + + if len(names) > 1: + k += " links: " + ",".join(names[1:]) + + if edge == items[-1][1]: + print(root_prefix + last_prefix + k) + p = root_prefix + if level > 0: + p += " " + self.print_graph(p, edge, level + 1) + else: + print(root_prefix + prefix + k) + p = root_prefix + "│ " + self.print_graph(p, edge, level + 1) + + def _walk(self, root): + """ + Walk through sysfs to get all devnodes that aren't ignored. + + By default, uses /sys as sysfs mounting point. If another + directory is used, it replaces them to /sys at the patches. + """ + + with os.scandir(root) as obj: + for entry in obj: + path = os.path.join(root, entry.name) + if self.sysfs: + p = path.replace(self.sysfs, "/sys", count=1) + else: + p = path + + if self.re_ignore.search(p): + return + + # Handle link first to avoid directory recursion + if entry.is_symlink(): + real = os.path.realpath(path) + if not self.sysfs: + self.aliases[path] = real + else: + real = real.replace(self.sysfs, "/sys", count=1) + + # Add absfile location to graph if it doesn't exist + if not self.re_ignore.search(real): + # Add link to the graph + self.graph_add_file(real, p) + + elif entry.is_file(): + self.graph_add_file(p) + + elif entry.is_dir(): + self._walk(path) + + def __init__(self, abi, sysfs="/sys", hints=False): + """ + Initialize internal variables and get a list of all files inside + sysfs that can currently be parsed. + + Please notice that there are several entries on sysfs that aren't + documented as ABI. Ignore those. + + The real paths will be stored under self.files. Aliases will be + stored in separate, as self.aliases. + """ + + self.abi = abi + self.log = abi.log + + if sysfs != "/sys": + self.sysfs = sysfs.rstrip("/") + else: + self.sysfs = None + + self.hints = hints + + self.root = {} + self.aliases = {} + self.files = set() + + dont_walk = [ + # Those require root access and aren't documented at ABI + f"^{sysfs}/kernel/debug", + f"^{sysfs}/kernel/tracing", + f"^{sysfs}/fs/pstore", + f"^{sysfs}/fs/bpf", + f"^{sysfs}/fs/fuse", + + # This is not documented at ABI + f"^{sysfs}/module", + + f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI + f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings + "sections|notes", # aren't actually part of ABI + + # kernel-parameters.txt - not easy to parse + "parameters", + ] + + self.re_ignore = re.compile("|".join(dont_walk)) + + print(f"Reading {sysfs} directory contents...", file=sys.stderr) + self._walk(sysfs) + + def check_file(self, refs, found): + """Check missing ABI symbols for a given sysfs file.""" + + res_list = [] + + try: + for names in refs: + fname = names[0] + + res = { + "found": False, + "fname": fname, + "msg": "", + } + res_list.append(res) + + re_what = self.abi.get_regexes(fname) + if not re_what: + self.abi.log.warning(f"missing rules for {fname}") + continue + + for name in names: + for r in re_what: + if self.abi.debug & AbiDebug.UNDEFINED: + self.log.debug("check if %s matches '%s'", name, r.pattern) + if r.match(name): + res["found"] = True + if found: + res["msg"] += f" {fname}: regex:\n\t" + continue + + if self.hints and not res["found"]: + res["msg"] += f" {fname} not found. Tested regexes:\n" + for r in re_what: + res["msg"] += " " + r.pattern + "\n" + + except KeyboardInterrupt: + pass + + return res_list + + def _ref_interactor(self, root): + """Recursive function to interact over the sysfs tree.""" + + for k, v in root.items(): + if isinstance(v, dict): + yield from self._ref_interactor(v) + + if root == self.root or k == "__name": + continue + + if self.abi.re_string: + fname = v["__name"][0] + if self.abi.re_string.search(fname): + yield v + else: + yield v + + + def get_fileref(self, all_refs, chunk_size): + """Interactor to group refs into chunks.""" + + n = 0 + refs = [] + + for ref in all_refs: + refs.append(ref) + + n += 1 + if n >= chunk_size: + yield refs + n = 0 + refs = [] + + yield refs + + def check_undefined_symbols(self, max_workers=None, chunk_size=50, + found=None, dry_run=None): + """Seach ABI for sysfs symbols missing documentation.""" + + self.abi.parse_abi() + + if self.abi.debug & AbiDebug.GRAPH: + self.print_graph() + + all_refs = [] + for ref in self._ref_interactor(self.root): + all_refs.append(ref["__name"]) + + if dry_run: + print("Would check", file=sys.stderr) + for ref in all_refs: + print(", ".join(ref)) + + return + + print("Starting to search symbols (it may take several minutes):", + file=sys.stderr) + start = datetime.now() + old_elapsed = None + + # Python doesn't support multithreading due to limitations on its + # global lock (GIL). While Python 3.13 finally made GIL optional, + # there are still issues related to it. Also, we want to have + # backward compatibility with older versions of Python. + # + # So, use instead multiprocess. However, Python is very slow passing + # data from/to multiple processes. Also, it may consume lots of memory + # if the data to be shared is not small. So, we need to group workload + # in chunks that are big enough to generate performance gains while + # not being so big that would cause out-of-memory. + + num_refs = len(all_refs) + print(f"Number of references to parse: {num_refs}", file=sys.stderr) + + if not max_workers: + max_workers = os.cpu_count() + elif max_workers > os.cpu_count(): + max_workers = os.cpu_count() + + max_workers = max(max_workers, 1) + + max_chunk_size = int((num_refs + max_workers - 1) / max_workers) + chunk_size = min(chunk_size, max_chunk_size) + chunk_size = max(1, chunk_size) + + if max_workers > 1: + executor = futures.ProcessPoolExecutor + + # Place references in a random order. This may help improving + # performance, by mixing complex/simple expressions when creating + # chunks + shuffle(all_refs) + else: + # Python has a high overhead with processes. When there's just + # one worker, it is faster to not create a new process. + # Yet, User still deserves to have a progress print. So, use + # python's "thread", which is actually a single process, using + # an internal schedule to switch between tasks. No performance + # gains for non-IO tasks, but still it can be quickly interrupted + # from time to time to display progress. + executor = futures.ThreadPoolExecutor + + not_found = [] + f_list = [] + with executor(max_workers=max_workers) as exe: + for refs in self.get_fileref(all_refs, chunk_size): + if refs: + try: + f_list.append(exe.submit(self.check_file, refs, found)) + + except KeyboardInterrupt: + return + + total = len(f_list) + + if not total: + if self.abi.re_string: + print(f"No ABI symbol matches {self.abi.search_string}") + else: + self.abi.log.warning("No ABI symbols found") + return + + print(f"{len(f_list):6d} jobs queued on {max_workers} workers", + file=sys.stderr) + + while f_list: + try: + t = futures.wait(f_list, timeout=1, + return_when=futures.FIRST_COMPLETED) + + done = t[0] + + for fut in done: + res_list = fut.result() + + for res in res_list: + if not res["found"]: + not_found.append(res["fname"]) + if res["msg"]: + print(res["msg"]) + + f_list.remove(fut) + except KeyboardInterrupt: + return + + except RuntimeError as e: + self.abi.log.warning(f"Future: {e}") + break + + if sys.stderr.isatty(): + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + if len(f_list) < total: + elapsed += f" ({total - len(f_list)}/{total} jobs completed). " + if elapsed != old_elapsed: + print(elapsed + "\r", end="", flush=True, + file=sys.stderr) + old_elapsed = elapsed + + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + print(elapsed, file=sys.stderr) + + for f in sorted(not_found): + print(f"{f} not found.") diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py new file mode 100755 index 000000000000..41a51d9d6f62 --- /dev/null +++ b/tools/lib/python/feat/parse_features.py @@ -0,0 +1,507 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0911,R0912,R0914,R0915 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + + +""" +Library to parse the Linux Feature files and produce a ReST book. +""" + +import os +import re +import sys + +from glob import iglob + + +class ParseFeature: + """ + Parses Documentation/features, allowing to generate ReST documentation + from it. + """ + + #: feature header string. + h_name = "Feature" + + #: Kernel config header string. + h_kconfig = "Kconfig" + + #: description header string. + h_description = "Description" + + #: subsystem header string. + h_subsys = "Subsystem" + + #: status header string. + h_status = "Status" + + #: architecture header string. + h_arch = "Architecture" + + #: Sort order for status. Others will be mapped at the end. + status_map = { + "ok": 0, + "TODO": 1, + "N/A": 2, + # The only missing status is "..", which was mapped as "---", + # as this is an special ReST cell value. Let it get the + # default order (99). + } + + def __init__(self, prefix, debug=0, enable_fname=False): + """ + Sets internal variables. + """ + + self.prefix = prefix + self.debug = debug + self.enable_fname = enable_fname + + self.data = {} + + # Initial maximum values use just the headers + self.max_size_name = len(self.h_name) + self.max_size_kconfig = len(self.h_kconfig) + self.max_size_description = len(self.h_description) + self.max_size_desc_word = 0 + self.max_size_subsys = len(self.h_subsys) + self.max_size_status = len(self.h_status) + self.max_size_arch = len(self.h_arch) + self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch + self.description_size = 1 + + self.msg = "" + + def emit(self, msg="", end="\n"): + """Helper function to append a new message for feature output.""" + + self.msg += msg + end + + def parse_error(self, fname, ln, msg, data=None): + """ + Displays an error message, printing file name and line. + """ + + if ln: + fname += f"#{ln}" + + print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="") + + if data: + data = data.rstrip() + print(f":\n\t{data}", file=sys.stderr) + else: + print("", file=sys.stderr) + + def parse_feat_file(self, fname): + """Parses a single arch-support.txt feature file.""" + + if os.path.isdir(fname): + return + + base = os.path.basename(fname) + + if base != "arch-support.txt": + if self.debug: + print(f"ignoring {fname}", file=sys.stderr) + return + + subsys = os.path.dirname(fname).split("/")[-2] + self.max_size_subsys = max(self.max_size_subsys, len(subsys)) + + feature_name = "" + kconfig = "" + description = "" + comments = "" + arch_table = {} + + if self.debug > 1: + print(f"Opening {fname}", file=sys.stderr) + + if self.enable_fname: + full_fname = os.path.abspath(fname) + self.emit(f".. FILE {full_fname}") + + with open(fname, encoding="utf-8") as f: + for ln, line in enumerate(f, start=1): + line = line.strip() + + match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line) + if match: + feature_name = match.group(1) + + self.max_size_name = max(self.max_size_name, + len(feature_name)) + continue + + match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line) + if match: + kconfig = match.group(1) + + self.max_size_kconfig = max(self.max_size_kconfig, + len(kconfig)) + continue + + match = re.match(r"^\#\s+description:\s*(.*\S)", line) + if match: + description = match.group(1) + + self.max_size_description = max(self.max_size_description, + len(description)) + + words = re.split(r"\s+", line)[1:] + for word in words: + self.max_size_desc_word = max(self.max_size_desc_word, + len(word)) + + continue + + if re.search(r"^\\s*$", line): + continue + + if re.match(r"^\s*\-+\s*$", line): + continue + + if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line): + continue + + match = re.match(r"^\#\s*(.*)$", line) + if match: + comments += match.group(1) + continue + + match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line) + if match: + arch = match.group(1) + status = match.group(2) + + self.max_size_status = max(self.max_size_status, + len(status)) + self.max_size_arch = max(self.max_size_arch, len(arch)) + + if status == "..": + status = "---" + + arch_table[arch] = status + + continue + + self.parse_error(fname, ln, "Line is invalid", line) + + if not feature_name: + self.parse_error(fname, 0, "Feature name not found") + return + if not subsys: + self.parse_error(fname, 0, "Subsystem not found") + return + if not kconfig: + self.parse_error(fname, 0, "Kconfig not found") + return + if not description: + self.parse_error(fname, 0, "Description not found") + return + if not arch_table: + self.parse_error(fname, 0, "Architecture table not found") + return + + self.data[feature_name] = { + "where": fname, + "subsys": subsys, + "kconfig": kconfig, + "description": description, + "comments": comments, + "table": arch_table, + } + + self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch) + + def parse(self): + """Parses all arch-support.txt feature files inside self.prefix.""" + + path = os.path.expanduser(self.prefix) + + if self.debug > 2: + print(f"Running parser for {path}") + + example_path = os.path.join(path, "arch-support.txt") + + for fname in iglob(os.path.join(path, "**"), recursive=True): + if fname != example_path: + self.parse_feat_file(fname) + + return self.data + + def output_arch_table(self, arch, feat=None): + """ + Output feature(s) for a given architecture. + """ + + title = f"Feature status on {arch} architecture" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + self.emit(f"{self.h_subsys:<{self.max_size_subsys}} ", end="") + self.emit(f"{self.h_name:<{self.max_size_name}} ", end="") + self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}} ", end="") + self.emit(f"{self.h_status:<{self.max_size_status}} ", end="") + self.emit(f"{self.h_description:<{self.max_size_description}}") + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + sorted_features = sorted(self.data.keys(), + key=lambda x: (self.data[x]["subsys"], + x.lower())) + + for name in sorted_features: + if feat and name != feat: + continue + + arch_table = self.data[name]["table"] + + if not arch in arch_table: + continue + + self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}} ", + end="") + self.emit(f"{name:<{self.max_size_name}} ", end="") + self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}} ", + end="") + self.emit(f"{arch_table[arch]:<{self.max_size_status}} ", + end="") + self.emit(f"{self.data[name]['description']}") + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + return self.msg + + def output_feature(self, feat): + """ + Output a feature on all architectures. + """ + + title = f"Feature {feat}" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + if not feat in self.data: + return + + if self.data[feat]["subsys"]: + self.emit(f":Subsystem: {self.data[feat]['subsys']}") + if self.data[feat]["kconfig"]: + self.emit(f":Kconfig: {self.data[feat]['kconfig']}") + + desc = self.data[feat]["description"] + desc = desc[0].upper() + desc[1:] + desc = desc.rstrip(". \t") + self.emit(f"\n{desc}.\n") + + com = self.data[feat]["comments"].strip() + if com: + self.emit("Comments") + self.emit("--------") + self.emit(f"\n{com}\n") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + self.emit(f"{self.h_arch:<{self.max_size_arch}} ", end="") + self.emit(f"{self.h_status:<{self.max_size_status}}") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + arch_table = self.data[feat]["table"] + for arch in sorted(arch_table.keys()): + self.emit(f"{arch:<{self.max_size_arch}} ", end="") + self.emit(f"{arch_table[arch]:<{self.max_size_status}}") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + return self.msg + + def matrix_lines(self, desc_size, max_size_status, header): + """ + Helper function to split element tables at the output matrix. + """ + + if header: + ln_marker = "=" + else: + ln_marker = "-" + + self.emit("+" + ln_marker * self.max_size_name + "+", end="") + self.emit(ln_marker * desc_size, end="") + self.emit("+" + ln_marker * max_size_status + "+") + + def output_matrix(self): + """ + Generates a set of tables, groped by subsystem, containing + what's the feature state on each architecture. + """ + + title = "Feature status on all architectures" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + desc_title = f"{self.h_kconfig} / {self.h_description}" + + desc_size = self.max_size_kconfig + 4 + if not self.description_size: + desc_size = max(self.max_size_description, desc_size) + else: + desc_size = max(self.description_size, desc_size) + + desc_size = max(self.max_size_desc_word, desc_size, len(desc_title)) + + notcompat = "Not compatible" + self.max_size_status = max(self.max_size_status, len(notcompat)) + + min_status_size = self.max_size_status + self.max_size_arch + 4 + max_size_status = max(min_status_size, self.max_size_status) + + h_status_per_arch = "Status per architecture" + max_size_status = max(max_size_status, len(h_status_per_arch)) + + cur_subsys = None + for name in sorted(self.data.keys(), + key=lambda x: (self.data[x]["subsys"], x.lower())): + if not cur_subsys or cur_subsys != self.data[name]["subsys"]: + if cur_subsys: + self.emit() + + cur_subsys = self.data[name]["subsys"] + + title = f"Subsystem: {cur_subsys}" + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + self.matrix_lines(desc_size, max_size_status, 0) + + self.emit(f"|{self.h_name:<{self.max_size_name}}", end="") + self.emit(f"|{desc_title:<{desc_size}}", end="") + self.emit(f"|{h_status_per_arch:<{max_size_status}}|") + + self.matrix_lines(desc_size, max_size_status, 1) + + lines = [] + descs = [] + cur_status = "" + line = "" + + arch_table = sorted(self.data[name]["table"].items(), + key=lambda x: (self.status_map.get(x[1], 99), + x[0].lower())) + + for arch, status in arch_table: + if status == "---": + status = notcompat + + if status != cur_status: + if line != "": + lines.append(line) + line = "" + line = f"- **{status}**: {arch}" + elif len(line) + len(arch) + 2 < max_size_status: + line += f", {arch}" + else: + lines.append(line) + line = f" {arch}" + cur_status = status + + if line != "": + lines.append(line) + + description = self.data[name]["description"] + while len(description) > desc_size: + desc_line = description[:desc_size] + + last_space = desc_line.rfind(" ") + if last_space != -1: + desc_line = desc_line[:last_space] + descs.append(desc_line) + description = description[last_space + 1:] + else: + desc_line = desc_line[:-1] + descs.append(desc_line + "\\") + description = description[len(desc_line):] + + if description: + descs.append(description) + + while len(lines) < 2 + len(descs): + lines.append("") + + for ln, line in enumerate(lines): + col = ["", ""] + + if not ln: + col[0] = name + col[1] = f"``{self.data[name]['kconfig']}``" + else: + if ln >= 2 and descs: + col[1] = descs.pop(0) + + self.emit(f"|{col[0]:<{self.max_size_name}}", end="") + self.emit(f"|{col[1]:<{desc_size}}", end="") + self.emit(f"|{line:<{max_size_status}}|") + + self.matrix_lines(desc_size, max_size_status, 0) + + return self.msg + + def list_arch_features(self, arch, feat): + """ + Print a matrix of kernel feature support for the chosen architecture. + """ + self.emit("#") + self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:") + self.emit("#") + + # Sort by subsystem, then by feature name (case‑insensitive) + for name in sorted(self.data.keys(), + key=lambda n: (self.data[n]["subsys"].lower(), + n.lower())): + if feat and name != feat: + continue + + feature = self.data[name] + arch_table = feature["table"] + status = arch_table.get(arch, "") + status = " " * ((4 - len(status)) // 2) + status + + self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ", + end="") + self.emit(f"{name:<{self.max_size_name}}: ", end="") + self.emit(f"{status:<5}| ", end="") + self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ", + end="") + self.emit(f"# {feature['description']}") + + return self.msg diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py new file mode 100755 index 000000000000..aba22c33393d --- /dev/null +++ b/tools/lib/python/jobserver.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling:: + + claim = os.read(reader, 1) + + - when the job finshes, call:: + + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to ``make -j$((claim+1))``, e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial ``make -j$n_proc`` call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +def warn(text, *args): + print(f'WARNING: {text}', *args, file = sys.stderr) + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM=<available slots jobs + 1>. + """ + + def __init__(self): + """Initialize internal vars.""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on.""" + + if self.is_open: + return + self.is_open = True # We only try once + self.claim = None + # + # Check the make flags for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + # + flags = os.environ.get('MAKEFLAGS', '') + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + if not opts: + return + # + # Separate out the provided file descriptors + # + split_opt = opts[-1].split('=', 1) + if len(split_opt) != 2: + warn('unparseable option:', opts[-1]) + return + fds = split_opt[1] + # + # As of GNU Make 4.4, we'll be looking for a named pipe + # identified as fifo:path + # + if fds.startswith('fifo:'): + path = fds[len('fifo:'):] + try: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + except (OSError, IOError): + warn('unable to open jobserver pipe', path) + return + # + # Otherwise look for integer file-descriptor numbers. + # + else: + split_fds = fds.split(',') + if len(split_fds) != 2: + warn('malformed jobserver file descriptors:', fds) + return + try: + self.reader = int(split_fds[0]) + self.writer = int(split_fds[1]) + except ValueError: + warn('non-integer jobserver file-descriptors:', fds) + return + try: + # + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + # + self.reader = os.open(f"/proc/self/fd/{self.reader}", + os.O_RDONLY | os.O_NONBLOCK) + except (IOError, OSError) as e: + warn('Unable to reopen jobserver read-side pipe:', repr(e)) + return + # + # OK, we have the channel to the job server; read out as many jobserver + # slots as possible. + # + while True: + try: + slot = os.read(self.reader, 8) + if not slot: + # + # Something went wrong. Clear self.jobs to avoid writing + # weirdness back to the jobserver and give up. + self.jobs = b"" + warn("unexpected empty token from jobserver;" + " possible invalid '--jobserver-auth=' setting") + self.claim = None + return + except (OSError, IOError) as e: + # + # If there is nothing more to read then we are done. + # + if e.errno == errno.EWOULDBLOCK: + break + # + # Anything else says that something went weird; give back + # the jobs and give up. + # + if self.jobs: + os.write(self.writer, self.jobs) + self.claim = None + warn('error reading from jobserver pipe', repr(e)) + return + self.jobs += slot + # + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + # + self.claim = len(self.jobs) + 1 + + def close(self): + """Return all reserved slots to Jobserver.""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/kdoc/__init__.py diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py new file mode 100644 index 000000000000..d1be4e5e1962 --- /dev/null +++ b/tools/lib/python/kdoc/enrich_formatter.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """ + Initialize class and check if is TTY. + """ + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + r""" + Handle ReST markups (currently, only \`\`text\`\` markups). + """ + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """ + Enrich descriptions with markups on it. + """ + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """ + Enrich positional arguments at usage: line. + """ + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """ + Enrich argument names. + """ + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py new file mode 100644 index 000000000000..022487ea2cc6 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Classes for navigating through the files that kernel-doc needs to handle +to generate documentation. +""" + +import argparse +import logging +import os +import re + +from kdoc.kdoc_parser import KernelDoc +from kdoc.kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively.""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(follow_symlinks=False): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an iterator to parse all source files from file_list, + handling directories if any. + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + ``EXPORT_SYMBOL*`` macros; + - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count.""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count.""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files. + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files. + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the styler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction. + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in glob.parse_files(export_file, self.file_not_found_cb): + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + if fname not in self.results: + self.config.log.warning("No kernel-doc for file %s", fname) + continue + + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: + m = self.out_msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + if msg: + yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py new file mode 100644 index 000000000000..2b8a93f79716 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# A class that will, eventually, encapsulate all of the parsed data that we +# then pass into the output modules. +# + +""" +Data class to store a kernel-doc Item. +""" + +class KdocItem: + """ + A class that will, eventually, encapsulate all of the parsed data that we + then pass into the output modules. + """ + + def __init__(self, name, fname, type, start_line, **other_stuff): + self.name = name + self.fname = fname + self.type = type + self.declaration_start_line = start_line + self.sections = {} + self.sections_start_lines = {} + self.parameterlist = [] + self.parameterdesc_start_lines = [] + self.parameterdescs = {} + self.parametertypes = {} + # + # Just save everything else into our own dict so that the output + # side can grab it directly as before. As we move things into more + # structured data, this will, hopefully, fade away. + # + self.other_stuff = other_stuff + + def get(self, key, default = None): + """ + Get a value from optional keys. + """ + return self.other_stuff.get(key, default) + + def __getitem__(self, key): + return self.get(key) + + # + # Tracking of section and parameter information. + # + def set_sections(self, sections, start_lines): + """ + Set sections and start lines. + """ + self.sections = sections + self.section_start_lines = start_lines + + def set_params(self, names, descs, types, starts): + """ + Set parameter list: names, descriptions, types and start lines. + """ + self.parameterlist = names + self.parameterdescs = descs + self.parametertypes = types + self.parameterdesc_start_lines = starts diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py new file mode 100644 index 000000000000..4210b91dde5f --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -0,0 +1,880 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Classes to implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class ``OutputFormat``. It +contains dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output, e.g. ``RestFormat`` and ``ManFormat`` classes. + +Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc.kdoc_parser import KernelDoc, type_param +from kdoc.kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 #: Output all symbols and doc sections. + OUTPUT_INCLUDE = 1 #: Output only specified symbols. + OUTPUT_EXPORTED = 2 #: Output exported symbols. + OUTPUT_INTERNAL = 3 #: Output non-exported symbols. + + #: Highlights to be used in ReST format. + highlights = [] + + #: Blank line character. + blankline = "" + + def __init__(self): + """Declare internal vars and set mode to ``OUTPUT_ALL``.""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according to the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + for log_msg in args.warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output.""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + """ + + self.data = "" + + dtype = args.type + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "var": + self.out_var(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't know how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc.""" + + def out_doc(self, fname, name, args): + """Outputs a DOC block.""" + + def out_function(self, fname, name, args): + """Outputs a function.""" + + def out_enum(self, fname, name, args): + """Outputs an enum.""" + + def out_var(self, fname, name, args): + """Outputs a variable.""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef.""" + + def out_struct(self, fname, name, args): + """Outputs a struct.""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output.""" + + #: Highlights to be used in ReST format + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + + blankline = "\n" + + #: Sphinx literal block regex. + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + + #: Sphinx code block regex. + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number.""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable. + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + for section, text in args.sections.items(): + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(args.section_start_lines.get(section, 0)) + self.output_highlight(text) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = name + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += name + " (" + + ln = args.declaration_start_line + count = 0 + for parameter in args.parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args.parametertypes.get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {name}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML <div>) to help set + # function prototypes apart + self.lineprefix = " " + + if args.parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args.parametertypes.get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in args.parameterdescs and \ + args.parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(args.parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in args.parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(args.parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_var(self, fname, name, args): + oldprefix = self.lineprefix + ln = args.declaration_start_line + full_proto = args.other_stuff["full_proto"] + + self.lineprefix = " " + + self.data += f"\n\n.. c:macro:: {name}\n\n{self.lineprefix}``{full_proto}``\n\n" + + self.print_lineno(ln) + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + if args.other_stuff["default_val"]: + self.data += f'{self.lineprefix}**Initialization**\n\n' + self.output_highlight(f'default: ``{args.other_stuff["default_val"]}``') + + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.type + ln = args.declaration_start_line + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in args.parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(args.parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output.""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + #: Allowed timestamp formats. + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + self.symbols = [] + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages.""" + + # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax. + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_function(self, fname, name, args): + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {name}' + "\n" + else: + self.data += f'.B "{name}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in args.parameterlist: + if count == len(args.parameterlist) - 1: + post = ");" + + dtype = args.parametertypes.get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if args.parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in args.parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(text) + + def out_enum(self, fname, name, args): + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {name}" + " {\n" + + count = 0 + for parameter in args.parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(args.parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_var(self, fname, name, args): + out_name = self.arg_name(args, name) + full_proto = args.other_stuff["full_proto"] + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"{full_proto}\n" + + if args.other_stuff["default_val"]: + self.data += f'.SH "Initialization"' + "\n" + self.output_highlight(f'default: {args.other_stuff["default_val"]}') + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_typedef(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {name} \\- {purpose}\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_struct(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + definition = args.get('definition') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args.type} {name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{args.type} {name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in args.parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name)) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py new file mode 100644 index 000000000000..ca00695b47b3 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -0,0 +1,1784 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +Classes and functions related to reading a C language source or header FILE +and extract embedded documentation comments from it. +""" + +import sys +import re +from pprint import pformat + +from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_item import KdocItem + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make it easier to +# convert from the Perl script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) +doc_sect = doc_com + \ + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end delimiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__exit +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Ancillary functions +# + +def apply_transforms(xforms, text): + """ + Apply a set of transforms to a block of text. + """ + for search, subst in xforms: + text = search.sub(subst, text) + return text + +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + """ + A little helper to get rid of excess white space. + """ + return multi_space.sub(' ', s.strip()) + +def trim_private_members(text): + """ + Remove ``struct``/``enum`` members that have been marked "private". + """ + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + +class state: + """ + States used by the parser's state machine. + """ + + # Parser states + NORMAL = 0 #: Normal code. + NAME = 1 #: Looking for function name. + DECLARATION = 2 #: We have seen a declaration which might not be done. + BODY = 3 #: The body of the comment. + SPECIAL_SECTION = 4 #: Doc section ending with a blank line. + PROTO = 5 #: Scanning prototype. + DOCBLOCK = 6 #: Documentation block. + INLINE_NAME = 7 #: Gathering doc outside main block. + INLINE_TEXT = 8 #: Reading the body of inline docs. + + #: Names for each parser state. + name = [ + "NORMAL", + "NAME", + "DECLARATION", + "BODY", + "SPECIAL_SECTION", + "PROTO", + "DOCBLOCK", + "INLINE_NAME", + "INLINE_TEXT", + ] + + +SECTION_DEFAULT = "Description" #: Default section. + +class KernelEntry: + """ + Encapsulates a Kernel documentation entry. + """ + + def __init__(self, config, fname, ln): + self.config = config + self.fname = fname + + self._contents = [] + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + self.fname = fname + + # State flags + self.brcount = 0 + self.declaration_start_line = ln + 1 + + # + # Management of section contents + # + def add_text(self, text): + """Add a new text to the entry contents list.""" + self._contents.append(text) + + def contents(self): + """Returns a string with all content texts that were added.""" + return '\n'.join(self._contents) + '\n' + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message.""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + """ + Begin a new section. + """ + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return + name = self.section + contents = self.contents() + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'") + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents + else: + self.sections[name] = contents + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self._contents = [] + +python_warning = False + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + #: Name of context section. + section_context = "Context" + + #: Name of return section. + section_return = "Return" + + #: String to write when a parameter is not described. + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = state.NORMAL + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + python_warning = True + + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + if self.entry: + self.entry.emit_msg(ln, msg, warning=warning) + return + + log_msg = f"{self.fname}:{ln} {msg}" + + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def dump_section(self, start_new=True): + """ + Dump section contents to arrays/hashes intended for that purpose. + """ + + if self.entry: + self.entry.dump_section(start_new) + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Store the entry into an entry array. + + The actual output and output filters will be handled elsewhere. + """ + + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections + for section in ["Description", "Return"]: + if section in sections and not sections[section].rstrip(): + del sections[section] + item.set_sections(sections, self.entry.section_start_lines) + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) + self.entries.append(item) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def emit_unused_warnings(self): + """ + When the parser fails to produce a valid entry, it places some + warnings under `entry.warnings` that will be discarded when resetting + the state. + + Ensure that those warnings are not lost. + + .. note:: + + Because we are calling `config.warning()` here, those + warnings are not filtered by the `-W` parameters: they will all + be produced even when `-Wreturn`, `-Wshort-desc`, and/or + `-Wcontents-before-sections` are used. + + Allowing those warnings to be filtered is complex, because it + would require storing them in a buffer and then filtering them + during the output step of the code, depending on the + selected symbols. + """ + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.warning(log_msg) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.emit_unused_warnings() + + self.entry = KernelEntry(self.config, self.fname, ln) + + # State flags + self.state = state.NORMAL + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + # + # The pointer-to-function case. + # + elif KernRe(r'\(.+\)\s*\(').search(arg): + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # + elif KernRe(r'\(.+\)\s*\[').search(arg): + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + args = KernRe(r'\s*,\s*').split(arg) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) + else: + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + + bitfield_re = KernRe(r'(.*?):(\w+)') + for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): + if dtype != "": # Skip unnamed bit-fields + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", + arg, declaration_name) + else: + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + self.emit_msg(ln, + f"Excess {dname} '{section}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") + + def split_struct_proto(self, proto): + """ + Split apart a structure prototype; returns (struct|union, name, + members) or ``None``. + """ + + type_pattern = r'(struct|union)' + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + return (r.group(1), r.group(2), r.group(3)) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + if r.search(proto): + return (r.group(1), r.group(3), r.group(2)) + return None + + def rewrite_struct_members(self, members): + """ + Process ``struct``/``union`` members from the most deeply nested + outward. + + Rewrite the members of a ``struct`` or ``union`` for easier formatting + later on. Among other things, this function will turn a member like:: + + struct { inner_members; } foo; + + into:: + + struct foo; inner_members; + """ + + # + # The trick is in the ``^{`` below - it prevents a match of an outer + # ``struct``/``union`` until the inner one has been munged + # (removing the ``{`` in the process). + # + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: + for t in tuples: + newmember = "" + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): + s_id = s_id.strip() + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + # + # Pass through the members of this inner structure/union. + # + for arg in content.split(';'): + arg = arg.strip() + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') + if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) + # Pointer-to-function + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + # + # Otherwise a non-function member. + # + else: + # + # Remove bitmap and array portions and spaces around commas + # + arg = KernRe(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) + # + # Look for a normal decl - "type name[,name...]" + # + r = KernRe(r'(.*)\s+([\S+,]+)') + if r.search(arg): + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " + else: + newmember += f"{arg}; " + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # + members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members + + def format_struct_decl(self, declaration): + """ + Format the ``struct`` declaration into a standard form for inclusion + in the resulting docs. + """ + + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + + def dump_struct(self, ln, proto): + """ + Store an entry for a ``struct`` or ``union`` + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) + self.output_declaration(decl_type, declaration_name, + definition=self.format_struct_decl(declaration), + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Store an ``enum`` inside self.entries array. + """ + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + # + # Parse out the name and members of the enum. Typedef form first. + # + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = trim_private_members(r.group(1)) + # + # Failing that, look for a straight enum + # + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = trim_private_members(r.group(2)) + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # + member_set = set() + members = KernRe(r'\([^;)]*\)').sub('', members) + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + # + # Ensure that every described member actually exists in the enum. + # + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_msg(ln, + f"Excess enum value '@{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + purpose=self.entry.declaration_purpose) + + def dump_var(self, ln, proto): + """ + Store variables that are part of kAPI. + """ + VAR_ATTRIBS = [ + "extern", + ] + OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" + + sub_prefixes = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + (KernRe(r"=.*"), ""), + ] + + # + # Store the full prototype before modifying it + # + full_proto = proto + declaration_name = None + + # + # Handle macro definitions + # + macro_prefixes = [ + KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), + ] + + for r in macro_prefixes: + match = r.search(proto) + if match: + declaration_name = match.group(1) + break + + # + # Drop comments and macros to have a pure C prototype + # + if not declaration_name: + for r, sub in sub_prefixes: + proto = r.sub(sub, proto) + + proto = proto.rstrip() + + # + # Variable name is at the end of the declaration + # + + default_val = None + + r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + if r.match(proto): + if not declaration_name: + declaration_name = r.group(1) + + default_val = r.group(2) + else: + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + if r.match(proto): + default_val = r.group(1) + + if not declaration_name: + self.emit_msg(ln,f"{proto}: can't parse variable") + return + + if default_val: + default_val = default_val.lstrip("=").strip() + + self.output_declaration("var", declaration_name, + full_proto=full_proto, + default_val=default_val, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Store a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + elif self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + elif self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + elif self.entry.decl_type == "var": + self.dump_var(ln, prototype) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') + + def dump_function(self, ln, prototype): + """ + Store a function or function macro inside self.entries array. + """ + + found = func_macro = False + return_type = '' + decl_type = 'function' + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + prototype = new_proto + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'\w+' + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # + if not found: + patterns = [ + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', + ] + + for p in patterns: + r = KernRe(p) + if r.match(prototype): + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + found = True + break + # + # Parsing done; make sure that things are as we expect. + # + if not found: + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") + return + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") + return + self.check_sections(ln, declaration_name, "function") + self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + + def dump_typedef(self, ln, proto): + """ + Store a ``typedef`` inside self.entries array. + """ + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.create_parameter_list(ln, 'function', args, ',', declaration_name) + + self.output_declaration('function', declaration_name, + typedef=True, + functiontype=return_type, + purpose=self.entry.declaration_purpose) + return + # + # Not a function, try to parse a simple typedef. + # + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + purpose=self.entry.declaration_purpose) + return + + self.emit_msg(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process ``EXPORT_SYMBOL*`` tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + elif export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] + function_set.add(symbol) + return True + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the ``/**`` to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + + # next line is always the function name + self.state = state.NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + # + # Check for a DOC: block and handle them specially. + # + if doc_block.search(line): + + if not doc_block.group(1): + self.entry.begin_section(ln, "Introduction") + else: + self.entry.begin_section(ln, doc_block.group(1)) + + self.entry.identifier = self.entry.section + self.state = state.DOCBLOCK + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + + # Test for data declaration + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) + # + # Look for a function description + # + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) + self.entry.decl_type = "function" + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return + # + # OK, set up for a new kerneldoc entry. + # + self.state = state.BODY + self.entry.identifier = self.entry.identifier.strip(" ") + # if there's no @param blocks need to set up default section here + self.entry.begin_section(ln + 1) + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # + r = KernRe("[-:](.*)") + if r.search(line): + self.entry.declaration_purpose = trim_whitespace(r.group(1)) + self.state = state.DECLARATION + else: + self.entry.declaration_purpose = "" + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = state.NORMAL + + if self.config.verbose: + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + # + # Failed to find an identifier. Emit a warning + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + + def is_new_section(self, ln, line): + """ + Helper function to determine if a new section is being started. + """ + if doc_sect.search(line): + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # + newsection = doc_sect.group(1) + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + self.dump_section() + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.add_text(newcontents.lstrip()) + return True + return False + + def is_comment_end(self, ln, line): + """ + Helper function to detect (and effect) the end of a kerneldoc comment. + """ + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + <text> + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # Look for anything with the " * " line beginning. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # + if cont == "": + self.state = state.BODY + # + # Otherwise we have more of the declaration section to soak up. + # + else: + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line. + """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # + if KernRe(r"\s*\*\s*$").match(line): + self.entry.begin_section(ln, dump = True) + self.state = state.BODY + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.add_text(cont[self.entry.leading_space:]) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): + """ + STATE_BODY: the bulk of a kerneldoc comment. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + + if doc_content.search(line): + cont = doc_content.group(1) + self.entry.add_text(cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" + + if doc_inline_sect.search(line): + self.entry.begin_section(ln, doc_inline_sect.group(1)) + self.entry.add_text(doc_inline_sect.group(2).lstrip()) + self.state = state.INLINE_TEXT + elif doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? + + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" + + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions. + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions. + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype.""" + + # strip C99-style comments to end of line + line = KernRe(r"//.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif not line.startswith('#'): # skip other preprocessor stuff + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + # + # If we now have the whole prototype, clean it up and declare victory. + # + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments and surrounding spaces + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + # by turning it into + # int pcs_config(struct foo) + # + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + # + # Handle special declaration syntaxes + # + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """ + Ancillary routine to process a type. + """ + + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.begin_section(ln, doc_inline_oneline.group(1)) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() + + elif doc_inline_start.search(line): + self.state = state.INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a ``DOC:`` block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + + def parse_export(self): + """ + Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + #: The state/action table telling us which function to invoke in each state. + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.DECLARATION: process_decl, + state.SPECIAL_SECTION: process_special, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == state.PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + if not prev_ln: + prev_ln = ln + continue + + if prev: + ln = prev_ln + line = prev + line + prev = "" + prev_ln = None + + self.config.log.debug("%d %s: %s", + ln, state.name[self.state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) + + self.emit_unused_warnings() + + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..0bf9e01cdc57 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage. + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or reuses it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __repr__(self): + return f're.compile("{self.regex.pattern}")' + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results. + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results. + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall. + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split. + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub. + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match. + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern:: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parentheses of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter-aligned. + # + # The content inside parentheses is converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to: + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to raise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + r""" + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if the sub argument contains:: + + r'\1' + + it will work just like re: it places there the matched paired data + with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py new file mode 100755 index 000000000000..1d04cbda169f --- /dev/null +++ b/tools/lib/python/kdoc/latex_fonts.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts +========================================== + +For ``make pdfdocs``, reports of build errors of translations.pdf started +arriving early 2024 [1]_ [2]_. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3]_ format of "Noto CJK" +fonts [4]_ [5]_. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +.. [1] https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +.. [2] https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +.. [3] https://en.wikipedia.org/wiki/Variable_font +.. [4] https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +.. [5] https://build.opensuse.org/request/show/1157217 + +Workarounds for building translations.pdf +----------------------------------------- + +* Denylist "variable font" Noto CJK fonts. + + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template:: + + <?xml version="1.0"?> + <!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd"> + <fontconfig> + <!-- + Ignore variable-font glob (not to break xetex) + --> + <selectfont> + <rejectfont> + <!-- + for Fedora + --> + <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob> + <!-- + for openSUSE tumbleweed + --> + <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob> + </rejectfont> + </selectfont> + </fontconfig> + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + + - Need non-variable "Noto CJK" fonts. + + * Fedora + + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + + * openSUSE tumbleweed + + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + + and at: + + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + + then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +.. caution:: + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self, deny_vf=None): + if not deny_vf: + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def description(self): + """ + Returns module description. + """ + return __doc__ + + def get_noto_cjk_vf_fonts(self): + """ + Get Noto CJK fonts. + """ + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """ + Check for problems with CJK fonts. + """ + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, run: + + tools/docs/check-variable-fonts.py -h + """) + msg += "=" * 77 + + return msg diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py new file mode 100755 index 000000000000..9941cd19032e --- /dev/null +++ b/tools/lib/python/kdoc/parse_data_structs.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying ``define``, function, ``struct``, ``typedef``, +``enum`` and ``enum`` symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like:: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying ``define``, function, ``struct``, ``typedef``, + ``enum`` and ``enum`` symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules:: + + ignore <type> <symbol>` + + Removes the symbol from reference generation. + + 2. Replace rules:: + + replace <type> <old_symbol> <new_reference> + + Replaces how old_symbol with a new reference. The new_reference can be: + + - A simple symbol name; + - A full Sphinx reference. + + 3. Namespace rules:: + + namespace <namespace> + + Sets C namespace to be used during cross-reference generation. Can + be overridden by replace rules. + + On ignore and replace rules, ``<type>`` can be: + - ``ioctl``: for defines that end with ``_IO*``, e.g. ioctl definitions + - ``define``: for other defines + - ``symbol``: for symbols defined within enums; + - ``typedef``: for typedefs; + - ``enum``: for the name of a non-anonymous enum; + - ``struct``: for structs. + + Examples:: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + + namespace MC + """ + + #: Parser regex with multiple ways to capture enums. + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + + #: Parser regex with multiple ways to capture structs. + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # NOTE: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + + #: Dictionary containing C type identifiers to be transformed. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + self.namespace = None + self.ignore = [] + self.replace = [] + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def read_exceptions(self, fname: str): + """ + Read an optional exceptions file, used to override defaults. + """ + + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + + if match: + self.ignore.append((ln, match.group(1), match.group(2))) + continue + + # replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if match: + self.replace.append((ln, match.group(1), match.group(2), + match.group(3))) + continue + + match = re.match(r"^namespace\s+(\S+)", line) + if match: + self.namespace = match.group(1) + continue + + sys.exit(f"{name}:{ln}: invalid line: {line}") + + def apply_exceptions(self): + """ + Process exceptions file with rules to ignore or replace references. + """ + + # Handle ignore rules + for ln, c_type, symbol in self.ignore: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + # Handle replace rules + for ln, c_type, old, new in self.replace: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + (_, ln) = self.symbols[c_type][old] + self.symbols[c_type][old] = (new_ref, ln) + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def store_type(self, ln, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Store a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by ``-``. + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type or self.namespace: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + # C domain references may have namespaces + if ref_type.startswith(":c:"): + if self.namespace: + ref_name = f"{self.namespace}.{ref_name}" + + if ref_type: + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = f"`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) + + def store_line(self, line): + """ + Store a line at self.data, properly indented. + """ + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str, exceptions: str = None): + """ + Read a C source file and get identifiers. + """ + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + self.read_exceptions(exceptions) + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type(line_no, "symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type(line_no, "ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type(line_no, "define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type(line_no, "typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type(line_no, "enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type(line_no, "struct", match.group(1)) + break + + self.apply_exceptions() + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, (ref, ln) in sorted(refs.items()): + print(f" #{ln:<5d} {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, (replacement, _) in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a list of symbols to be part of a TOC contents table. + """ + text = [] + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, (ref, ln) in sorted(refs.items()): + text.append(f"- LINENO_{ln}: {ref}") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + """ + Write a ReST output file. + """ + + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py new file mode 100644 index 000000000000..4ddb7ead5f56 --- /dev/null +++ b/tools/lib/python/kdoc/python_version.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> + +""" +Handle Python version check logic. + +Not all Python versions are supported by scripts. Yet, on some cases, +like during documentation build, a newer version of python could be +available. + +This class allows checking if the minimal requirements are followed. + +Better than that, PythonVersion.check_python() not only checks the minimal +requirements, but it automatically switches to a the newest available +Python version if present. + +""" + +import os +import re +import subprocess +import shlex +import sys + +from glob import glob +from textwrap import indent + +class PythonVersion: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + def __init__(self, version): + """ + Ïnitialize self.version tuple from a version string. + """ + self.version = self.parse_version(version) + + @staticmethod + def parse_version(version): + """ + Convert a major.minor.patch version into a tuple. + """ + return tuple(int(x) for x in version.split(".")) + + @staticmethod + def ver_str(version): + """ + Returns a version tuple as major.minor.patch. + """ + return ".".join([str(x) for x in version]) + + @staticmethod + def cmd_print(cmd, max_len=80): + """ + Outputs a command line, repecting maximum width. + """ + + cmd_line = [] + + for w in cmd: + w = shlex.quote(w) + + if cmd_line: + if not max_len or len(cmd_line[-1]) + len(w) < max_len: + cmd_line[-1] += " " + w + continue + else: + cmd_line[-1] += " \\" + cmd_line.append(w) + else: + cmd_line.append(w) + + return "\n ".join(cmd_line) + + def __str__(self): + """ + Return a version tuple as major.minor.patch from self.version. + """ + return self.ver_str(self.version) + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + kwargs = {} + if sys.version_info < (3, 7): + kwargs['universal_newlines'] = True + else: + kwargs['text'] = True + + result = subprocess.run([cmd, "--version"], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + **kwargs, check=False) + + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return PythonVersion.parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(min_version): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9][0-9]", + "python3.[0-9]", + ] + + python_cmd = [] + + # Seek for a python binary newer than min_version + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = PythonVersion.get_python_version(cmd) + if version >= min_version: + python_cmd.append((version, cmd)) + + return sorted(python_cmd, reverse=True) + + @staticmethod + def check_python(min_version, show_alternatives=False, bail_out=False, + success_on_error=False): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= min_version: + ver = PythonVersion.ver_str(cur_ver) + return + + python_ver = PythonVersion.ver_str(cur_ver) + + available_versions = PythonVersion.find_python(min_version) + if not available_versions: + print(f"ERROR: Python version {python_ver} is not supported anymore\n") + print(" Can't find a new version. This script may fail") + return + + script_path = os.path.abspath(sys.argv[0]) + + # Check possible alternatives + if available_versions: + new_python_cmd = available_versions[0][1] + else: + new_python_cmd = None + + if show_alternatives and available_versions: + print("You could run, instead:") + for _, cmd in available_versions: + args = [cmd, script_path] + sys.argv[1:] + + cmd_str = indent(PythonVersion.cmd_print(args), " ") + print(f"{cmd_str}\n") + + if bail_out: + msg = f"Python {python_ver} not supported. Bailing out" + if success_on_error: + print(msg, file=sys.stderr) + sys.exit(0) + else: + sys.exit(msg) + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + # Restart script using the newer version + args = [new_python_cmd, script_path] + sys.argv[1:] + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index ddaeb4eb3e24..db94aa685b73 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -97,11 +97,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) ei++; } } - if (ci != cj) { - while (ci < cmds->cnt) { - cmds->names[cj++] = cmds->names[ci]; - cmds->names[ci++] = NULL; + while (ci < cmds->cnt) { + if (ci != cj) { + cmds->names[cj] = cmds->names[ci]; + cmds->names[ci] = NULL; } + ci++; + cj++; } for (ci = cj; ci < cmds->cnt; ci++) assert(cmds->names[ci] == NULL); diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template index ac24d0ab17f5..3b8a24d0a8b8 100644 --- a/tools/lib/thermal/libthermal.pc.template +++ b/tools/lib/thermal/libthermal.pc.template @@ -8,5 +8,5 @@ Name: libthermal Description: thermal library Requires: libnl-3.0 libnl-genl-3.0 Version: @VERSION@ -Libs: -L${libdir} -lnl-genl-3 -lnl-3 -Cflags: -I${includedir} -I${include}/libnl3 +Libs: -L${libdir} -lnl-genl-3 -lnl-3 -lthermal +Cflags: -I${includedir} -I${includedir}/libnl3 |
