diff options
| author | Thomas Gleixner <tglx@kernel.org> | 2026-05-18 12:01:07 +0300 |
|---|---|---|
| committer | Thomas Gleixner <tglx@kernel.org> | 2026-05-18 12:01:07 +0300 |
| commit | 09d6818d3bdc1ea6e49a425040528cbdbc97bc0a (patch) | |
| tree | 989f9d94c592294e4ae5421c9cbcbad99eee996c /tools/lib | |
| parent | 1655f6895a896eb632ca8a019259bc5d358a9712 (diff) | |
| parent | 5200f5f493f79f14bbdc349e402a40dfb32f23c8 (diff) | |
| download | linux-09d6818d3bdc1ea6e49a425040528cbdbc97bc0a.tar.xz | |
Merge branch 'linus' into timers/clocksource
... to bring it up to date for new changes.
Diffstat (limited to 'tools/lib')
26 files changed, 2866 insertions, 702 deletions
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index aa83d22c45e3..fedc9070f0e4 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -169,3 +169,13 @@ bool __bitmap_subset(const unsigned long *bitmap1, return false; return true; } + +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] ^ bitmap2[k]; +} diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 83fe79ffcb8f..ceb57b46a878 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -29,6 +29,36 @@ static struct btf_type btf_void; +/* + * Describe how kinds are laid out; some have a singular element following the "struct btf_type", + * some have BTF_INFO_VLEN(t->info) elements. Specify sizes for both. Flags are currently unused. + * Kind layout can be optionally added to the BTF representation in a dedicated section to + * facilitate parsing. New kinds must be added here. + */ +static struct btf_layout layouts[NR_BTF_KINDS] = { +/* singular element size vlen element(s) size flags */ +[BTF_KIND_UNKN] = { 0, 0, 0 }, +[BTF_KIND_INT] = { sizeof(__u32), 0, 0 }, +[BTF_KIND_PTR] = { 0, 0, 0 }, +[BTF_KIND_ARRAY] = { sizeof(struct btf_array), 0, 0 }, +[BTF_KIND_STRUCT] = { 0, sizeof(struct btf_member), 0 }, +[BTF_KIND_UNION] = { 0, sizeof(struct btf_member), 0 }, +[BTF_KIND_ENUM] = { 0, sizeof(struct btf_enum), 0 }, +[BTF_KIND_FWD] = { 0, 0, 0 }, +[BTF_KIND_TYPEDEF] = { 0, 0, 0 }, +[BTF_KIND_VOLATILE] = { 0, 0, 0 }, +[BTF_KIND_CONST] = { 0, 0, 0 }, +[BTF_KIND_RESTRICT] = { 0, 0, 0 }, +[BTF_KIND_FUNC] = { 0, 0, 0 }, +[BTF_KIND_FUNC_PROTO] = { 0, sizeof(struct btf_param), 0 }, +[BTF_KIND_VAR] = { sizeof(struct btf_var), 0, 0 }, +[BTF_KIND_DATASEC] = { 0, sizeof(struct btf_var_secinfo), 0 }, +[BTF_KIND_FLOAT] = { 0, 0, 0 }, +[BTF_KIND_DECL_TAG] = { sizeof(struct btf_decl_tag), 0, 0 }, +[BTF_KIND_TYPE_TAG] = { 0, 0, 0 }, +[BTF_KIND_ENUM64] = { 0, sizeof(struct btf_enum64), 0 }, +}; + struct btf { /* raw BTF data in native endianness */ void *raw_data; @@ -40,42 +70,53 @@ struct btf { /* * When BTF is loaded from an ELF or raw memory it is stored - * in a contiguous memory block. The hdr, type_data, and, strs_data + * in a contiguous memory block. The type_data, layout and strs_data * point inside that memory region to their respective parts of BTF * representation: * - * +--------------------------------+ - * | Header | Types | Strings | - * +--------------------------------+ - * ^ ^ ^ - * | | | - * hdr | | - * types_data-+ | - * strs_data------------+ + * +----------------------------------------+---------------+ + * | Header | Types | Optional layout | Strings | + * +--------------------------------------------------------+ + * ^ ^ ^ ^ + * | | | | + * raw_data | | | + * types_data-+ | | + * layout---------------+ | + * strs_data--------------------------------+ + * + * A separate struct btf_header is embedded as btf->hdr, + * and header information is copied into it. This allows us + * to handle header data for various header formats; the original, + * the extended header with layout info, etc. * * If BTF data is later modified, e.g., due to types added or * removed, BTF deduplication performed, etc, this contiguous - * representation is broken up into three independently allocated - * memory regions to be able to modify them independently. + * representation is broken up into four independent memory + * regions. + * * raw_data is nulled out at that point, but can be later allocated * and cached again if user calls btf__raw_data(), at which point - * raw_data will contain a contiguous copy of header, types, and - * strings: + * raw_data will contain a contiguous copy of header, types, optional + * layout and strings. layout optionally points to a + * btf_layout array - this allows us to encode information about + * the kinds known at encoding time. If layout is NULL no + * layout information is encoded. * - * +----------+ +---------+ +-----------+ - * | Header | | Types | | Strings | - * +----------+ +---------+ +-----------+ - * ^ ^ ^ - * | | | - * hdr | | - * types_data----+ | - * strset__data(strs_set)-----+ + * +----------+ +---------+ +-----------+ +-----------+ + * | Header | | Types | | Layout | | Strings | + * +----------+ +---------+ +-----------+ +-----------+ + * ^ ^ ^ ^ + * | | | | + * hdr | | | + * types_data----+ | | + * layout---------------------+ | + * strset__data(strs_set)---------------------+ * - * +----------+---------+-----------+ - * | Header | Types | Strings | - * raw_data----->+----------+---------+-----------+ + * +----------+---------+-------------------+-----------+ + * | Header | Types | Optional Layout | Strings | + * raw_data----->+----------+---------+-------------------+-----------+ */ - struct btf_header *hdr; + struct btf_header hdr; void *types_data; size_t types_data_cap; /* used size stored in hdr->type_len */ @@ -125,6 +166,17 @@ struct btf { /* whether raw_data is a (read-only) mmap */ bool raw_data_is_mmap; + /* is BTF modifiable? i.e. is it split into separate sections as described above? */ + bool modifiable; + /* does BTF have header information we do not support? If so, disallow + * modification. + */ + bool has_hdr_extra; + /* Points either at raw kind layout data in parsed BTF (if present), or + * at an allocated kind layout array when BTF is modifiable. + */ + void *layout; + /* BTF object FD, if loaded into kernel */ int fd; @@ -216,7 +268,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) return 0; } -static void btf_bswap_hdr(struct btf_header *h) +static void btf_bswap_hdr(struct btf_header *h, __u32 hdr_len) { h->magic = bswap_16(h->magic); h->hdr_len = bswap_32(h->hdr_len); @@ -224,66 +276,115 @@ static void btf_bswap_hdr(struct btf_header *h) h->type_len = bswap_32(h->type_len); h->str_off = bswap_32(h->str_off); h->str_len = bswap_32(h->str_len); + /* May be operating on raw data with hdr_len that does not include below fields */ + if (hdr_len >= sizeof(struct btf_header)) { + h->layout_off = bswap_32(h->layout_off); + h->layout_len = bswap_32(h->layout_len); + } } static int btf_parse_hdr(struct btf *btf) { - struct btf_header *hdr = btf->hdr; - __u32 meta_left; + struct btf_header *hdr = btf->raw_data; + __u32 hdr_len, meta_left; - if (btf->raw_size < sizeof(struct btf_header)) { + if (btf->raw_size < offsetofend(struct btf_header, str_len)) { pr_debug("BTF header not found\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; + if (hdr->magic == bswap_16(BTF_MAGIC)) { btf->swapped_endian = true; - if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { + hdr_len = bswap_32(hdr->hdr_len); + if (hdr_len < offsetofend(struct btf_header, str_len)) { pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", - bswap_32(hdr->hdr_len)); + hdr_len); return -ENOTSUP; } - btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF magic: %x\n", hdr->magic); return -EINVAL; } - if (btf->raw_size < hdr->hdr_len) { + if (btf->raw_size < hdr_len) { pr_debug("BTF header len %u larger than data size %u\n", - hdr->hdr_len, btf->raw_size); + hdr_len, btf->raw_size); return -EINVAL; } - meta_left = btf->raw_size - hdr->hdr_len; - if (meta_left < (long long)hdr->str_off + hdr->str_len) { + if (btf->swapped_endian) + btf_bswap_hdr(hdr, hdr_len); + + memcpy(&btf->hdr, hdr, min((size_t)hdr_len, sizeof(struct btf_header))); + + /* If unknown header data is found, modification is prohibited in + * btf_ensure_modifiable(). + */ + if (hdr_len > sizeof(struct btf_header)) { + __u8 *h = (__u8 *)hdr; + __u32 i; + + for (i = sizeof(struct btf_header); i < hdr_len; i++) { + if (!h[i]) + continue; + btf->has_hdr_extra = true; + pr_debug("Unknown BTF header data at offset %u; modification is disallowed\n", + i); + break; + } + } + + meta_left = btf->raw_size - hdr_len; + if (meta_left < (long long)btf->hdr.str_off + btf->hdr.str_len) { pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } - if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { + if ((long long)btf->hdr.type_off + btf->hdr.type_len > btf->hdr.str_off) { pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", - hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); + btf->hdr.type_off, btf->hdr.type_len, btf->hdr.str_off, + btf->hdr.str_len); return -EINVAL; } - if (hdr->type_off % 4) { + if (btf->hdr.type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } + if (btf->hdr.layout_len == 0) + return 0; + + /* optional layout section sits between types and strings */ + if (btf->hdr.layout_off % 4) { + pr_debug("BTF layout section is not aligned to 4 bytes\n"); + return -EINVAL; + } + if (btf->hdr.layout_off < (long long)btf->hdr.type_off + btf->hdr.type_len) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, layout data at %u + %u\n", + btf->hdr.type_off, btf->hdr.type_len, + btf->hdr.layout_off, btf->hdr.layout_len); + return -EINVAL; + } + if ((long long)btf->hdr.layout_off + btf->hdr.layout_len > btf->hdr.str_off || + btf->hdr.layout_off > btf->hdr.str_off) { + pr_debug("Invalid BTF data sections layout: layout data at %u + %u, strings data at %u\n", + btf->hdr.layout_off, btf->hdr.layout_len, btf->hdr.str_off); + return -EINVAL; + } return 0; } static int btf_parse_str_sec(struct btf *btf) { - const struct btf_header *hdr = btf->hdr; const char *start = btf->strs_data; - const char *end = start + btf->hdr->str_len; + const char *end = start + btf->hdr.str_len; - if (btf->base_btf && hdr->str_len == 0) + if (btf->base_btf && btf->hdr.str_len == 0) return 0; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { + if (!btf->hdr.str_len || btf->hdr.str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -294,7 +395,63 @@ static int btf_parse_str_sec(struct btf *btf) return 0; } -static int btf_type_size(const struct btf_type *t) +static int btf_parse_layout_sec(struct btf *btf) +{ + if (!btf->hdr.layout_len) + return 0; + + if (btf->hdr.layout_len % sizeof(struct btf_layout) != 0) { + pr_debug("Invalid BTF kind layout section\n"); + return -EINVAL; + } + btf->layout = btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off; + + if (btf->swapped_endian) { + struct btf_layout *l, *end = btf->layout + btf->hdr.layout_len; + + for (l = btf->layout; l < end; l++) + l->flags = bswap_16(l->flags); + } + + return 0; +} + +/* for unknown kinds, consult kind layout. */ +static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t) +{ + __u32 l_cnt = btf->hdr.layout_len / sizeof(struct btf_layout); + struct btf_layout *l = btf->layout; + __u16 vlen = btf_vlen(t); + __u32 kind = btf_kind(t); + + /* Fall back to base BTF if needed as they share layout information */ + if (!l) { + struct btf *base_btf = btf->base_btf; + + if (base_btf) { + l = base_btf->layout; + l_cnt = base_btf->hdr.layout_len / sizeof(struct btf_layout); + } + } + if (!l || kind >= l_cnt) { + pr_debug("Unsupported BTF_KIND: %u\n", btf_kind(t)); + return -EINVAL; + } + if (l[kind].info_sz % 4) { + pr_debug("Unsupported info_sz %u for kind %u\n", + l[kind].info_sz, kind); + return -EINVAL; + } + if (l[kind].elem_sz % 4) { + pr_debug("Unsupported elem_sz %u for kind %u\n", + l[kind].elem_sz, kind); + return -EINVAL; + } + + return sizeof(struct btf_type) + l[kind].info_sz + vlen * l[kind].elem_sz; +} + +static int btf_type_size(const struct btf *btf, const struct btf_type *t) { const int base_size = sizeof(struct btf_type); __u16 vlen = btf_vlen(t); @@ -330,8 +487,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_DECL_TAG: return base_size + sizeof(struct btf_decl_tag); default: - pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); - return -EINVAL; + return btf_type_size_unknown(btf, t); } } @@ -421,16 +577,15 @@ static int btf_bswap_type_rest(struct btf_type *t) static int btf_parse_type_sec(struct btf *btf) { - struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; - void *end_type = next_type + hdr->type_len; + void *end_type = next_type + btf->hdr.type_len; int err, type_size; while (next_type + sizeof(struct btf_type) <= end_type) { if (btf->swapped_endian) btf_bswap_type_base(next_type); - type_size = btf_type_size(next_type); + type_size = btf_type_size(btf, next_type); if (type_size < 0) return type_size; if (next_type + type_size > end_type) { @@ -591,8 +746,12 @@ static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __ break; } default: - pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); - return -EINVAL; + /* Kind may be represented in kind layout information. */ + if (btf_type_size_unknown(btf, t) < 0) { + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + break; } return 0; } @@ -1012,7 +1171,8 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, static bool btf_is_modifiable(const struct btf *btf) { - return (void *)btf->hdr != btf->raw_data; + /* BTF is modifiable if split into multiple sections */ + return btf->modifiable; } static void btf_free_raw_data(struct btf *btf) @@ -1036,14 +1196,14 @@ void btf__free(struct btf *btf) if (btf_is_modifiable(btf)) { /* if BTF was modified after loading, it will have a split - * in-memory representation for header, types, and strings + * in-memory representation for types, strings and layout * sections, so we need to free all of them individually. It * might still have a cached contiguous raw data present, * which will be unconditionally freed below. */ - free(btf->hdr); free(btf->types_data); strset__free(btf->strs_set); + free(btf->layout); } btf_free_raw_data(btf); free(btf->raw_data_swapped); @@ -1053,8 +1213,11 @@ void btf__free(struct btf *btf) free(btf); } -static struct btf *btf_new_empty(struct btf *base_btf) +static struct btf *btf_new_empty(struct btf_new_opts *opts) { + bool add_layout = OPTS_GET(opts, add_layout, false); + struct btf *base_btf = OPTS_GET(opts, base_btf, NULL); + struct btf_header *hdr; struct btf *btf; btf = calloc(1, sizeof(*btf)); @@ -1072,26 +1235,42 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; btf->swapped_endian = base_btf->swapped_endian; } /* +1 for empty string at offset 0 */ btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); + if (add_layout) + btf->raw_size += sizeof(layouts); btf->raw_data = calloc(1, btf->raw_size); if (!btf->raw_data) { free(btf); return ERR_PTR(-ENOMEM); } - btf->hdr = btf->raw_data; - btf->hdr->hdr_len = sizeof(struct btf_header); - btf->hdr->magic = BTF_MAGIC; - btf->hdr->version = BTF_VERSION; + hdr = btf->raw_data; + hdr->hdr_len = sizeof(struct btf_header); + hdr->magic = BTF_MAGIC; + hdr->version = BTF_VERSION; - btf->types_data = btf->raw_data + btf->hdr->hdr_len; - btf->strs_data = btf->raw_data + btf->hdr->hdr_len; - btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ + btf->types_data = btf->raw_data + hdr->hdr_len; + btf->strs_data = btf->raw_data + hdr->hdr_len; + hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ + + if (add_layout) { + hdr->layout_len = sizeof(layouts); + btf->layout = layouts; + /* + * No need to swap endianness here as btf_get_raw_data() + * will do this for us if btf->swapped_endian is true. + */ + memcpy(btf->raw_data + hdr->hdr_len, layouts, sizeof(layouts)); + btf->strs_data += sizeof(layouts); + hdr->str_off += sizeof(layouts); + } + + memcpy(&btf->hdr, hdr, sizeof(*hdr)); return btf; } @@ -1103,7 +1282,19 @@ struct btf *btf__new_empty(void) struct btf *btf__new_empty_split(struct btf *base_btf) { - return libbpf_ptr(btf_new_empty(base_btf)); + LIBBPF_OPTS(btf_new_opts, opts); + + opts.base_btf = base_btf; + + return libbpf_ptr(btf_new_empty(&opts)); +} + +struct btf *btf__new_empty_opts(struct btf_new_opts *opts) +{ + if (!OPTS_VALID(opts, btf_new_opts)) + return libbpf_err_ptr(-EINVAL); + + return libbpf_ptr(btf_new_empty(opts)); } static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap) @@ -1124,7 +1315,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; } if (is_mmap) { @@ -1141,15 +1332,15 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b btf->raw_size = size; - btf->hdr = btf->raw_data; err = btf_parse_hdr(btf); if (err) goto done; - btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; - btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; + btf->strs_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.str_off; + btf->types_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.type_off; err = btf_parse_str_sec(btf); + err = err ?: btf_parse_layout_sec(btf); err = err ?: btf_parse_type_sec(btf); err = err ?: btf_sanity_check(btf); if (err) @@ -1601,7 +1792,7 @@ static const void *btf_strs_data(const struct btf *btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian) { - struct btf_header *hdr = btf->hdr; + const struct btf_header *hdr = &btf->hdr; struct btf_type *t; void *data, *p; __u32 data_sz; @@ -1614,14 +1805,17 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi } data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len; + if (btf->layout) + data_sz += hdr->layout_len; + data = calloc(1, data_sz); if (!data) return NULL; p = data; - memcpy(p, hdr, hdr->hdr_len); + memcpy(p, hdr, min((__u32)sizeof(struct btf_header), hdr->hdr_len)); if (swap_endian) - btf_bswap_hdr(p); + btf_bswap_hdr(p, hdr->hdr_len); p += hdr->hdr_len; memcpy(p, btf->types_data, hdr->type_len); @@ -1639,8 +1833,18 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi } p += hdr->type_len; + if (btf->layout) { + memcpy(p, btf->layout, hdr->layout_len); + if (swap_endian) { + struct btf_layout *l, *end = p + hdr->layout_len; + + for (l = p; l < end ; l++) + l->flags = bswap_16(l->flags); + } + p += hdr->layout_len; + } + memcpy(p, btf_strs_data(btf), hdr->str_len); - p += hdr->str_len; *size = data_sz; return data; @@ -1675,7 +1879,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) return btf__str_by_offset(btf->base_btf, offset); - else if (offset - btf->start_str_off < btf->hdr->str_len) + else if (offset - btf->start_str_off < btf->hdr.str_len) return btf_strs_data(btf) + (offset - btf->start_str_off); else return errno = EINVAL, NULL; @@ -1783,12 +1987,12 @@ static void btf_invalidate_raw_data(struct btf *btf) } /* Ensure BTF is ready to be modified (by splitting into a three memory - * regions for header, types, and strings). Also invalidate cached + * regions for types, strings and layout. Also invalidate cached * raw_data, if any. */ static int btf_ensure_modifiable(struct btf *btf) { - void *hdr, *types; + void *types, *layout = NULL; struct strset *set = NULL; int err = -ENOMEM; @@ -1798,45 +2002,58 @@ static int btf_ensure_modifiable(struct btf *btf) return 0; } - /* split raw data into three memory regions */ - hdr = malloc(btf->hdr->hdr_len); - types = malloc(btf->hdr->type_len); - if (!hdr || !types) + if (btf->has_hdr_extra) { + /* Additional BTF header data was found; not safe to modify. */ + return -EOPNOTSUPP; + } + + /* split raw data into memory regions; btf->hdr is done already. */ + types = malloc(btf->hdr.type_len); + if (!types) goto err_out; + memcpy(types, btf->types_data, btf->hdr.type_len); - memcpy(hdr, btf->hdr, btf->hdr->hdr_len); - memcpy(types, btf->types_data, btf->hdr->type_len); + if (btf->hdr.layout_len) { + layout = malloc(btf->hdr.layout_len); + if (!layout) + goto err_out; + memcpy(layout, btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off, + btf->hdr.layout_len); + } /* build lookup index for all strings */ - set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len); + set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr.str_len); if (IS_ERR(set)) { err = PTR_ERR(set); goto err_out; } /* only when everything was successful, update internal state */ - btf->hdr = hdr; btf->types_data = types; - btf->types_data_cap = btf->hdr->type_len; + btf->types_data_cap = btf->hdr.type_len; btf->strs_data = NULL; btf->strs_set = set; + if (layout) + btf->layout = layout; /* if BTF was created from scratch, all strings are guaranteed to be * unique and deduplicated */ - if (btf->hdr->str_len == 0) + if (btf->hdr.str_len == 0) btf->strs_deduped = true; - if (!btf->base_btf && btf->hdr->str_len == 1) + if (!btf->base_btf && btf->hdr.str_len == 1) btf->strs_deduped = true; /* invalidate raw_data representation */ btf_invalidate_raw_data(btf); + btf->modifiable = true; + return 0; err_out: strset__free(set); - free(hdr); free(types); + free(layout); return err; } @@ -1849,6 +2066,7 @@ err_out: int btf__find_str(struct btf *btf, const char *s) { int off; + int err; if (btf->base_btf) { off = btf__find_str(btf->base_btf, s); @@ -1857,8 +2075,9 @@ int btf__find_str(struct btf *btf, const char *s) } /* BTF needs to be in a modifiable state to build string lookup index */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); off = strset__find_str(btf->strs_set, s); if (off < 0) @@ -1875,6 +2094,7 @@ int btf__find_str(struct btf *btf, const char *s) int btf__add_str(struct btf *btf, const char *s) { int off; + int err; if (btf->base_btf) { off = btf__find_str(btf->base_btf, s); @@ -1882,14 +2102,15 @@ int btf__add_str(struct btf *btf, const char *s) return off; } - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); off = strset__add_str(btf->strs_set, s); if (off < 0) return libbpf_err(off); - btf->hdr->str_len = strset__data_size(btf->strs_set); + btf->hdr.str_len = strset__data_size(btf->strs_set); return btf->start_str_off + off; } @@ -1897,7 +2118,7 @@ int btf__add_str(struct btf *btf, const char *s) static void *btf_add_type_mem(struct btf *btf, size_t add_sz) { return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1, - btf->hdr->type_len, UINT_MAX, add_sz); + btf->hdr.type_len, UINT_MAX, add_sz); } static void btf_type_inc_vlen(struct btf_type *t) @@ -1905,16 +2126,31 @@ static void btf_type_inc_vlen(struct btf_type *t) t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); } +static void btf_hdr_update_type_len(struct btf *btf, int new_len) +{ + btf->hdr.type_len = new_len; + if (btf->layout) { + btf->hdr.layout_off = btf->hdr.type_off + new_len; + btf->hdr.str_off = btf->hdr.layout_off + btf->hdr.layout_len; + } else { + btf->hdr.str_off = btf->hdr.type_off + new_len; + } +} + +static void btf_hdr_update_str_len(struct btf *btf, int new_len) +{ + btf->hdr.str_len = new_len; +} + static int btf_commit_type(struct btf *btf, int data_sz) { int err; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + err = btf_add_type_idx_entry(btf, btf->hdr.type_len); if (err) return libbpf_err(err); - btf->hdr->type_len += data_sz; - btf->hdr->str_off += data_sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz); btf->nr_types++; return btf->start_id + btf->nr_types - 1; } @@ -1963,13 +2199,14 @@ static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) __u32 *str_off; int sz, err; - sz = btf_type_size(src_type); + sz = btf_type_size(p->src, src_type); if (sz < 0) return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(p->dst)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(p->dst); + if (err) + return libbpf_err(err); t = btf_add_type_mem(p->dst, sz); if (!t) @@ -2004,24 +2241,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; int data_sz, sz, cnt, i, err, old_strs_len; + __u32 src_start_id; __u32 *off; void *t; - /* appending split BTF isn't supported yet */ - if (src_btf->base_btf) - return libbpf_err(-ENOTSUP); + /* + * When appending split BTF, the destination must share the same base + * BTF so that base type ID references remain valid. + */ + if (src_btf->base_btf && src_btf->base_btf != btf->base_btf) + return libbpf_err(-EOPNOTSUPP); + + src_start_id = src_btf->base_btf ? btf__type_cnt(src_btf->base_btf) : 1; /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); /* remember original strings section size if we have to roll back * partial strings section changes */ - old_strs_len = btf->hdr->str_len; + old_strs_len = btf->hdr.str_len; - data_sz = src_btf->hdr->type_len; - cnt = btf__type_cnt(src_btf) - 1; + data_sz = src_btf->hdr.type_len; + cnt = src_btf->nr_types; /* pre-allocate enough memory for new types */ t = btf_add_type_mem(btf, data_sz); @@ -2045,7 +2289,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) struct btf_field_iter it; __u32 *type_id, *str_off; - sz = btf_type_size(t); + sz = btf_type_size(src_btf, t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ err = sz; @@ -2060,6 +2304,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (err) goto err_out; while ((str_off = btf_field_iter_next(&it))) { + /* don't remap strings from shared base BTF */ + if (*str_off < src_btf->start_str_off) + continue; err = btf_rewrite_str(&p, str_off); if (err) goto err_out; @@ -2074,11 +2321,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!*type_id) /* nothing to do for VOID references */ continue; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; + /* don't remap types from shared base BTF */ + if (*type_id < src_start_id) + continue; + + *type_id += btf->start_id + btf->nr_types - src_start_id; } /* go to next type data and type offset index entry */ @@ -2094,8 +2341,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) * update type count and various internal offsets and sizes to * "commit" the changes and made them visible to the outside world. */ - btf->hdr->type_len += data_sz; - btf->hdr->str_off += data_sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz); btf->nr_types += cnt; hashmap__free(p.str_off_map); @@ -2106,13 +2352,14 @@ err_out: /* zero out preallocated memory as if it was just allocated with * libbpf_add_mem() */ - memset(btf->types_data + btf->hdr->type_len, 0, data_sz); - memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + memset(btf->types_data + btf->hdr.type_len, 0, data_sz); + if (btf->strs_data) + memset(btf->strs_data + old_strs_len, 0, btf->hdr.str_len - old_strs_len); /* and now restore original strings section size; types data size * wasn't modified, so doesn't need restoring, see big comment above */ - btf->hdr->str_len = old_strs_len; + btf_hdr_update_str_len(btf, old_strs_len); hashmap__free(p.str_off_map); @@ -2132,6 +2379,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ if (str_is_empty(name)) @@ -2143,8 +2391,9 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(int); t = btf_add_type_mem(btf, sz); @@ -2180,6 +2429,7 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ if (str_is_empty(name)) @@ -2190,8 +2440,9 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) byte_sz != 16) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2225,12 +2476,14 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref { struct btf_type *t; int sz, name_off = 0; + int err; if (validate_type_id(ref_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2275,13 +2528,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n { struct btf_type *t; struct btf_array *a; + int err; int sz; if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_array); t = btf_add_type_mem(btf, sz); @@ -2305,9 +2560,11 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 { struct btf_type *t; int sz, name_off = 0; + int err; - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2387,6 +2644,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, struct btf_member *m; bool is_bitfield; int sz, name_off = 0; + int err; /* last type should be union/struct */ if (btf->nr_types == 0) @@ -2407,8 +2665,9 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_member); m = btf_add_type_mem(btf, sz); @@ -2430,8 +2689,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* update parent type's vlen and kflag */ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2440,13 +2698,15 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, { struct btf_type *t; int sz, name_off = 0; + int err; /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2502,6 +2762,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) struct btf_type *t; struct btf_enum *v; int sz, name_off; + int err; /* last type should be BTF_KIND_ENUM */ if (btf->nr_types == 0) @@ -2517,8 +2778,9 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) return libbpf_err(-E2BIG); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_enum); v = btf_add_type_mem(btf, sz); @@ -2540,8 +2802,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) if (value < 0) t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2579,6 +2840,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) struct btf_enum64 *v; struct btf_type *t; int sz, name_off; + int err; /* last type should be BTF_KIND_ENUM64 */ if (btf->nr_types == 0) @@ -2592,8 +2854,9 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_enum64); v = btf_add_type_mem(btf, sz); @@ -2612,8 +2875,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2782,13 +3044,15 @@ int btf__add_func(struct btf *btf, const char *name, int btf__add_func_proto(struct btf *btf, int ret_type_id) { struct btf_type *t; + int err; int sz; if (validate_type_id(ret_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2818,6 +3082,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) struct btf_type *t; struct btf_param *p; int sz, name_off = 0; + int err; if (validate_type_id(type_id)) return libbpf_err(-EINVAL); @@ -2830,8 +3095,9 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_param); p = btf_add_type_mem(btf, sz); @@ -2851,8 +3117,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2871,6 +3136,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) struct btf_type *t; struct btf_var *v; int sz, name_off; + int err; /* non-empty name */ if (str_is_empty(name)) @@ -2882,8 +3148,9 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_var); t = btf_add_type_mem(btf, sz); @@ -2920,13 +3187,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ if (str_is_empty(name)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2959,6 +3228,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ { struct btf_type *t; struct btf_var_secinfo *v; + int err; int sz; /* last type should be BTF_KIND_DATASEC */ @@ -2972,8 +3242,9 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_var_secinfo); v = btf_add_type_mem(btf, sz); @@ -2988,8 +3259,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2998,6 +3268,7 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, { struct btf_type *t; int sz, value_off; + int err; if (str_is_empty(value) || component_idx < -1) return libbpf_err(-EINVAL); @@ -3005,8 +3276,9 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, if (validate_type_id(ref_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); t = btf_add_type_mem(btf, sz); @@ -3630,10 +3902,9 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) return libbpf_err(-EINVAL); } - if (btf_ensure_modifiable(btf)) { - err = -ENOMEM; + err = btf_ensure_modifiable(btf); + if (err) goto done; - } err = btf_dedup_prep(d); if (err) { @@ -3953,7 +4224,7 @@ static int btf_dedup_strings(struct btf_dedup *d) /* replace BTF string data and hash with deduped ones */ strset__free(d->btf->strs_set); - d->btf->hdr->str_len = strset__data_size(d->strs_set); + btf_hdr_update_str_len(d->btf, strset__data_size(d->strs_set)); d->btf->strs_set = d->strs_set; d->strs_set = NULL; d->btf->strs_deduped = true; @@ -5386,7 +5657,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d) continue; t = btf__type_by_id(d->btf, id); - len = btf_type_size(t); + len = btf_type_size(d->btf, t); if (len < 0) return len; @@ -5400,14 +5671,17 @@ static int btf_dedup_compact_types(struct btf_dedup *d) /* shrink struct btf's internal types index and update btf_header */ d->btf->nr_types = next_type_id - d->btf->start_id; d->btf->type_offs_cap = d->btf->nr_types; - d->btf->hdr->type_len = p - d->btf->types_data; + d->btf->hdr.type_len = p - d->btf->types_data; new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, sizeof(*new_offs)); if (d->btf->type_offs_cap && !new_offs) return -ENOMEM; d->btf->type_offs = new_offs; - d->btf->hdr->str_off = d->btf->hdr->type_len; - d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len; + if (d->btf->layout) + d->btf->hdr.layout_off = d->btf->hdr.type_off + d->btf->hdr.type_len; + d->btf->hdr.str_off = d->btf->hdr.type_off + d->btf->hdr.type_len + d->btf->hdr.layout_len; + d->btf->raw_size = d->btf->hdr.hdr_len + d->btf->hdr.type_off + d->btf->hdr.type_len + + d->btf->hdr.layout_len + d->btf->hdr.str_len; return 0; } @@ -5865,7 +6139,7 @@ int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, goto done; } dist.split_start_id = btf__type_cnt(old_base); - dist.split_start_str = old_base->hdr->str_len; + dist.split_start_str = old_base->hdr.str_len; /* Pass over src split BTF; generate the list of base BTF type ids it * references; these will constitute our distilled BTF set to be @@ -5934,14 +6208,14 @@ done: const struct btf_header *btf_header(const struct btf *btf) { - return btf->hdr; + return &btf->hdr; } void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) { btf->base_btf = (struct btf *)base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; } int btf__relocate(struct btf *btf, const struct btf *base_btf) @@ -6008,16 +6282,15 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, goto done; } - new_types = calloc(btf->hdr->type_len, 1); + new_types = calloc(btf->hdr.type_len, 1); if (!new_types) { err = -ENOMEM; goto done; } - if (btf_ensure_modifiable(btf)) { - err = -ENOMEM; + err = btf_ensure_modifiable(btf); + if (err) goto done; - } for (i = start_offs; i < id_map_cnt; i++) { id = id_map[i]; @@ -6046,7 +6319,7 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, id = order_map[i]; t = btf__type_by_id(btf, id); - type_size = btf_type_size(t); + type_size = btf_type_size(btf, t); memcpy(nt, t, type_size); /* fix up referenced IDs for BTF */ @@ -6072,7 +6345,7 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) { btf->type_offs[i] = nt - new_types; - nt += btf_type_size(nt); + nt += btf_type_size(btf, nt); } free(order_map); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b30008c267c0..a1f8deca2603 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -109,6 +109,26 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +struct btf_new_opts { + size_t sz; + struct btf *base_btf; /* optional base BTF */ + bool add_layout; /* add BTF layout information */ + size_t:0; +}; +#define btf_new_opts__last_field add_layout + +/** + * @brief **btf__new_empty_opts()** creates an unpopulated BTF object with + * optional *base_btf* and BTF kind layout description if *add_layout* + * is set + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, NULL is returned and the thread-local `errno` variable is + * set to the error code. + */ +LIBBPF_API struct btf *btf__new_empty_opts(struct btf_new_opts *opts); + /** * @brief **btf__distill_base()** creates new versions of the split BTF * *src_btf* and its base BTF. The new base BTF will only contain the types diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 2fa434f09cce..4f19a0d79b0c 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -568,6 +568,53 @@ static int probe_ldimm64_full_range_off(int token_fd) return 1; } +#ifdef __x86_64__ + +#ifndef __NR_uprobe +#define __NR_uprobe 336 +#endif + +static int probe_uprobe_syscall(int token_fd) +{ + /* + * If kernel supports uprobe() syscall, it will return -ENXIO when called + * from the outside of a kernel-generated uprobe trampoline. + */ + return syscall(__NR_uprobe) < 0 && errno == ENXIO; +} +#else +static int probe_uprobe_syscall(int token_fd) +{ + return 0; +} +#endif + +static int probe_kern_btf_layout(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct btf_layout layout[] = { + { 0, 0, 0 }, + { sizeof(__u32), 0, 0 }, + }; + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(types), + .str_off = sizeof(types) + sizeof(layout), + .str_len = sizeof(strs), + .layout_off = sizeof(types), + .layout_len = sizeof(layout), + }; + + return probe_fd(libbpf__load_raw_btf_hdr(&hdr, (char *)types, strs, + (char *)layout, token_fd)); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -646,6 +693,12 @@ static struct kern_feature_desc { [FEAT_LDIMM64_FULL_RANGE_OFF] = { "full range LDIMM64 support", probe_ldimm64_full_range_off, }, + [FEAT_UPROBE_SYSCALL] = { + "kernel supports uprobe syscall", probe_uprobe_syscall, + }, + [FEAT_BTF_LAYOUT] = { + "kernel supports BTF layout", probe_kern_btf_layout, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0be7017800fe..3a80a018fc7d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3138,12 +3138,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec || + !has_layout; } -static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -3153,9 +3155,64 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT); int enum64_placeholder_id = 0; + const struct btf_header *hdr; + struct btf *btf = NULL; + const void *raw_data; struct btf_type *t; int i, j, vlen; + __u32 sz; + int err; + + /* clone BTF to sanitize a copy and leave the original intact */ + raw_data = btf__raw_data(orig_btf, &sz); + if (!raw_data) + return ERR_PTR(-ENOMEM); + /* btf_header() gives us endian-safe header info */ + hdr = btf_header(orig_btf); + + if (!has_layout && hdr->hdr_len >= sizeof(struct btf_header) && + (hdr->layout_len != 0 || hdr->layout_off != 0)) { + const struct btf_header *old_hdr = raw_data; + struct btf_header *new_hdr; + void *new_raw_data; + __u32 new_str_off; + + /* + * Need to rewrite BTF to exclude layout information and + * move string section to immediately after types. + */ + new_raw_data = malloc(sz); + if (!new_raw_data) + return ERR_PTR(-ENOMEM); + + memcpy(new_raw_data, raw_data, sz); + new_hdr = new_raw_data; + new_hdr->layout_off = 0; + new_hdr->layout_len = 0; + new_str_off = hdr->type_off + hdr->type_len; + /* Handle swapped endian case */ + if (old_hdr->magic != hdr->magic) + new_hdr->str_off = bswap_32(new_str_off); + else + new_hdr->str_off = new_str_off; + + memmove(new_raw_data + hdr->hdr_len + new_str_off, + new_raw_data + hdr->hdr_len + hdr->str_off, + hdr->str_len); + sz = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len; + btf = btf__new(new_raw_data, sz); + free(new_raw_data); + } else { + btf = btf__new(raw_data, sz); + } + err = libbpf_get_error(btf); + if (err) + return ERR_PTR(err); + + /* enforce 8-byte pointers for BPF-targeted BTFs */ + btf__set_pointer_size(btf, 8); for (i = 1; i < btf__type_cnt(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); @@ -3233,9 +3290,10 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) if (enum64_placeholder_id == 0) { enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); - if (enum64_placeholder_id < 0) - return enum64_placeholder_id; - + if (enum64_placeholder_id < 0) { + btf__free(btf); + return ERR_PTR(enum64_placeholder_id); + } t = (struct btf_type *)btf__type_by_id(btf, i); } @@ -3249,7 +3307,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) } } - return 0; + return btf; } static bool libbpf_needs_btf(const struct bpf_object *obj) @@ -3600,21 +3658,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) sanitize = btf_needs_sanitization(obj); if (sanitize) { - const void *raw_data; - __u32 sz; - - /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__raw_data(obj->btf, &sz); - kern_btf = btf__new(raw_data, sz); - err = libbpf_get_error(kern_btf); - if (err) - return err; - - /* enforce 8-byte pointers for BPF-targeted BTFs */ - btf__set_pointer_size(obj->btf, 8); - err = bpf_object__sanitize_btf(obj, kern_btf); - if (err) - return err; + kern_btf = bpf_object__sanitize_btf(obj, obj->btf); + if (IS_ERR(kern_btf)) + return PTR_ERR(kern_btf); } if (obj->gen_loader) { @@ -5157,12 +5203,20 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; - if (obj->token_fd) + if (obj->feat_cache) return feat_supported(obj->feat_cache, feat_id); return feat_supported(NULL, feat_id); } +/* Used in testing to simulate missing features. */ +void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache) +{ + if (obj->feat_cache) + free(obj->feat_cache); + obj->feat_cache = cache; +} + static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; @@ -5798,11 +5852,12 @@ static int load_module_btfs(struct bpf_object *obj) info.name = ptr_to_u64(name); info.name_len = sizeof(name); + btf = NULL; err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); - goto err_out; + break; } /* ignore non-module BTFs */ @@ -5816,15 +5871,15 @@ static int load_module_btfs(struct bpf_object *obj) if (err) { pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", name, id, errstr(err)); - goto err_out; + break; } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) - goto err_out; + break; - mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; + mod_btf = &obj->btf_modules[obj->btf_module_cnt]; mod_btf->btf = btf; mod_btf->id = id; @@ -5832,16 +5887,16 @@ static int load_module_btfs(struct bpf_object *obj) mod_btf->name = strdup(name); if (!mod_btf->name) { err = -ENOMEM; - goto err_out; + break; } - continue; + obj->btf_module_cnt++; + } -err_out: + if (err) { + btf__free(btf); close(fd); - return err; } - - return 0; + return err; } static struct bpf_core_cand_list * @@ -9802,6 +9857,111 @@ __u32 bpf_program__line_info_cnt(const struct bpf_program *prog) return prog->line_info_cnt; } +int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts) +{ + LIBBPF_OPTS(bpf_prog_load_opts, attr); + struct bpf_object *obj; + const void *info; + __u32 info_cnt, info_rec_size; + int err, fd, prog_btf_fd; + + if (!prog) + return libbpf_err(-EINVAL); + + if (!OPTS_VALID(opts, bpf_prog_load_opts)) + return libbpf_err(-EINVAL); + + obj = prog->obj; + if (obj->state < OBJ_PREPARED) + return libbpf_err(-EINVAL); + + /* + * Caller-provided opts take priority; fall back to + * prog/object defaults when the caller leaves them zero. + */ + attr.attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0) ?: prog->attach_prog_fd; + attr.prog_flags = OPTS_GET(opts, prog_flags, 0) ?: prog->prog_flags; + attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0) ?: prog->prog_ifindex; + attr.kern_version = OPTS_GET(opts, kern_version, 0) ?: obj->kern_version; + attr.fd_array = OPTS_GET(opts, fd_array, NULL) ?: obj->fd_array; + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0) ?: obj->fd_array_cnt; + attr.token_fd = OPTS_GET(opts, token_fd, 0) ?: obj->token_fd; + if (attr.token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; + + prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); + if (!prog_btf_fd && obj->btf) + prog_btf_fd = btf__fd(obj->btf); + + /* BTF func/line info: only pass if kernel supports it */ + if (kernel_supports(obj, FEAT_BTF_FUNC) && prog_btf_fd > 0) { + attr.prog_btf_fd = prog_btf_fd; + + /* func_info/line_info triples: all-or-nothing from caller */ + info = OPTS_GET(opts, func_info, NULL); + info_cnt = OPTS_GET(opts, func_info_cnt, 0); + info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); + if (!!info != !!info_cnt || !!info != !!info_rec_size) { + pr_warn("prog '%s': func_info, func_info_cnt, and func_info_rec_size must all be specified or all omitted\n", + prog->name); + return libbpf_err(-EINVAL); + } + attr.func_info = info ?: prog->func_info; + attr.func_info_cnt = info ? info_cnt : prog->func_info_cnt; + attr.func_info_rec_size = info ? info_rec_size : prog->func_info_rec_size; + + info = OPTS_GET(opts, line_info, NULL); + info_cnt = OPTS_GET(opts, line_info_cnt, 0); + info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); + if (!!info != !!info_cnt || !!info != !!info_rec_size) { + pr_warn("prog '%s': line_info, line_info_cnt, and line_info_rec_size must all be specified or all omitted\n", + prog->name); + return libbpf_err(-EINVAL); + } + attr.line_info = info ?: prog->line_info; + attr.line_info_cnt = info ? info_cnt : prog->line_info_cnt; + attr.line_info_rec_size = info ? info_rec_size : prog->line_info_rec_size; + } + + /* Logging is caller-controlled; no fallback to prog/obj log settings */ + attr.log_buf = OPTS_GET(opts, log_buf, NULL); + attr.log_size = OPTS_GET(opts, log_size, 0); + attr.log_level = OPTS_GET(opts, log_level, 0); + + /* + * Fields below may be mutated by prog_prepare_load_fn: + * Seed them from prog/obj defaults here; + * Later override with caller-provided opts. + */ + attr.expected_attach_type = prog->expected_attach_type; + attr.attach_btf_id = prog->attach_btf_id; + attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + + if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { + err = prog->sec_def->prog_prepare_load_fn(prog, &attr, prog->sec_def->cookie); + if (err) + return libbpf_err(err); + } + + /* Re-apply caller overrides for output fields */ + if (OPTS_GET(opts, expected_attach_type, 0)) + attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); + if (OPTS_GET(opts, attach_btf_id, 0)) + attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); + if (OPTS_GET(opts, attach_btf_obj_fd, 0)) + attr.attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); + + /* + * Unlike bpf_object_load_prog(), we intentionally do not call bpf_prog_bind_map() + * for RODATA maps here to avoid mutating the object's state. Callers can bind the + * required maps themselves using bpf_prog_bind_map(). + */ + fd = bpf_prog_load(prog->type, prog->name, obj->license, prog->insns, prog->insns_cnt, + &attr); + + return libbpf_err(fd); +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -11692,6 +11852,8 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, default: return libbpf_err_ptr(-EINVAL); } + if (!func_name && legacy) + return libbpf_err_ptr(-EOPNOTSUPP); if (!legacy) { pfd = perf_event_open_probe(false /* uprobe */, retprobe, @@ -11711,21 +11873,21 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, offset, -1 /* pid */); } if (pfd < 0) { - err = -errno; - pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", + err = pfd; + pr_warn("prog '%s': failed to create %s '%s%s0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, - errstr(err)); + func_name ?: "", func_name ? "+" : "", + offset, errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { close(pfd); - pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", + pr_warn("prog '%s': failed to attach to %s '%s%s0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, - errstr(err)); + func_name ?: "", func_name ? "+" : "", + offset, errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -12041,7 +12203,16 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, if (addrs && syms) return libbpf_err_ptr(-EINVAL); - if (pattern) { + /* + * Exact function name (no wildcards) without unique_match: + * bypass kallsyms parsing and pass the symbol directly to the + * kernel via syms[] array. When unique_match is set, fall + * through to the slow path which detects duplicate symbols. + */ + if (pattern && !strpbrk(pattern, "*?") && !unique_match) { + syms = &pattern; + cnt = 1; + } else if (pattern) { if (has_available_filter_functions_addrs()) err = libbpf_available_kprobes_parse(&res); else @@ -12084,6 +12255,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; + /* + * Normalize error code: when exact name bypasses kallsyms + * parsing, kernel returns ESRCH from ftrace_lookup_symbols(). + * Convert to ENOENT for API consistency with the pattern + * matching path which returns ENOENT from userspace. + */ + if (err == -ESRCH) + err = -ENOENT; pr_warn("prog '%s': failed to attach: %s\n", prog->name, errstr(err)); goto error; @@ -12684,7 +12863,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, binary_path, func_offset, pid); } if (pfd < 0) { - err = -errno; + err = pfd; pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index dfc37a615578..bba4e8464396 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -557,7 +557,7 @@ struct bpf_kprobe_opts { size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; - /* function's offset to install kprobe to */ + /* function offset, or raw address if func_name == NULL */ size_t offset; /* kprobe is return probe */ bool retprobe; @@ -565,11 +565,36 @@ struct bpf_kprobe_opts { enum probe_attach_mode attach_mode; size_t :0; }; + #define bpf_kprobe_opts__last_field attach_mode +/** + * @brief **bpf_program__attach_kprobe()** attaches a BPF program to a + * kernel function entry or return. + * + * @param prog BPF program to attach + * @param retprobe Attach to function return + * @param func_name Name of the kernel function to attach to + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name); + +/** + * @brief **bpf_program__attach_kprobe_opts()** is just like + * bpf_program__attach_kprobe() except with an options struct + * for various configurations. + * + * @param prog BPF program to attach + * @param func_name Name of the kernel function to attach to. If NULL, + * opts->offset is treated as a raw kernel address. Raw-address attach + * is supported with PROBE_ATTACH_MODE_PERF and PROBE_ATTACH_MODE_LINK. + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, @@ -2021,6 +2046,23 @@ LIBBPF_API int libbpf_register_prog_handler(const char *sec, */ LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); +/** + * @brief **bpf_program__clone()** loads a single BPF program from a prepared + * BPF object into the kernel, returning its file descriptor. + * + * The BPF object must have been previously prepared with + * **bpf_object__prepare()**. If @opts is provided, any non-zero field + * overrides the defaults derived from the program/object internals. + * If @opts is NULL, all fields are populated automatically. + * + * The returned FD is owned by the caller and must be closed with close(). + * + * @param prog BPF program from a prepared object + * @param opts Optional load options; non-zero fields override defaults + * @return program FD (>= 0) on success; negative error code on failure + */ +LIBBPF_API int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d18fbcea7578..dfed8d60af05 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -455,3 +455,9 @@ LIBBPF_1.7.0 { bpf_program__assoc_struct_ops; btf__permute; } LIBBPF_1.6.0; + +LIBBPF_1.8.0 { + global: + bpf_program__clone; + btf__new_empty_opts; +} LIBBPF_1.7.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 974147e8a8aa..3781c45b46d3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -394,6 +394,10 @@ enum kern_feature_id { FEAT_BTF_QMARK_DATASEC, /* Kernel supports LDIMM64 imm offsets past 512 MiB. */ FEAT_LDIMM64_FULL_RANGE_OFF, + /* Kernel supports uprobe syscall */ + FEAT_UPROBE_SYSCALL, + /* Kernel supports BTF layout information */ + FEAT_BTF_LAYOUT, __FEAT_CNT, }; @@ -410,6 +414,7 @@ struct kern_feature_cache { bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache); int probe_kern_syscall_wrapper(int token_fd); int probe_memcg_account(int token_fd); @@ -420,6 +425,10 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len, int token_fd); +int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, + const char *raw_types, const char *str_sec, + const char *layout_sec, int token_fd); +struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf); int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level, int token_fd); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index bccf4bb747e1..b70d9637ecf5 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -218,18 +218,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) return libbpf_err(ret); } -int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd) +int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, const char *raw_types, + const char *str_sec, const char *layout_sec, + int token_fd) { - struct btf_header hdr = { - .magic = BTF_MAGIC, - .version = BTF_VERSION, - .hdr_len = sizeof(struct btf_header), - .type_len = types_len, - .str_off = types_len, - .str_len = str_len, - }; LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd, .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, @@ -237,14 +229,16 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, int btf_fd, btf_len; __u8 *raw_btf; - btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; + btf_len = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len + hdr->layout_len; raw_btf = malloc(btf_len); if (!raw_btf) return -ENOMEM; - memcpy(raw_btf, &hdr, sizeof(hdr)); - memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); - memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); + memcpy(raw_btf, hdr, sizeof(*hdr)); + memcpy(raw_btf + hdr->hdr_len + hdr->type_off, raw_types, hdr->type_len); + memcpy(raw_btf + hdr->hdr_len + hdr->str_off, str_sec, hdr->str_len); + if (layout_sec) + memcpy(raw_btf + hdr->hdr_len + hdr->layout_off, layout_sec, hdr->layout_len); btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); @@ -252,6 +246,22 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, return btf_fd; } +int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len, + int token_fd) +{ + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = types_len, + .str_off = types_len, + .str_len = str_len, + }; + + return libbpf__load_raw_btf_hdr(&hdr, raw_types, str_sec, NULL, token_fd); +} + static int load_local_storage_btf(void) { const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 99331e317dee..c446c0cd8cf9 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 7 +#define LIBBPF_MINOR_VERSION 8 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 6eea5edba58a..0ccc8f548cba 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -292,6 +292,8 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, ++spec_str; if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) return -EINVAL; + if (access_idx < 0) + return -EINVAL; if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) return -E2BIG; spec_str += parsed_len; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index d1524f6f54ae..e3710933fd52 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -262,6 +262,7 @@ struct usdt_manager { bool has_bpf_cookie; bool has_sema_refcnt; bool has_uprobe_multi; + bool has_uprobe_syscall; }; struct usdt_manager *usdt_manager_new(struct bpf_object *obj) @@ -301,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj) * usdt probes. */ man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); + + /* + * Detect kernel support for uprobe() syscall, it's presence means we can + * take advantage of faster nop5 uprobe handling. + * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe") + */ + man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL); return man; } @@ -585,13 +593,34 @@ static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); -static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, - const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, - struct usdt_target **out_targets, size_t *out_target_cnt) +#if defined(__x86_64__) +static bool has_nop_combo(int fd, long off) +{ + unsigned char nop_combo[6] = { + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */ + }; + unsigned char buf[6]; + + if (pread(fd, buf, 6, off) != 6) + return false; + return memcmp(buf, nop_combo, 6) == 0; +} +#else +static bool has_nop_combo(int fd, long off) +{ + return false; +} +#endif + +static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path, + pid_t pid, const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie, struct usdt_target **out_targets, + size_t *out_target_cnt) { size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; + Elf *elf = elf_fd->elf; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; GElf_Shdr base_shdr, notes_shdr; @@ -784,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target = &targets[target_cnt]; memset(target, 0, sizeof(*target)); + /* + * We have uprobe syscall and usdt with nop,nop5 instructions combo, + * so we can place the uprobe directly on nop5 (+1) and get this probe + * optimized. + */ + if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) { + usdt_abs_ip++; + usdt_rel_ip++; + } + target->abs_ip = usdt_abs_ip; target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; @@ -998,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct /* discover USDT in given binary, optionally limiting * activations to a given PID, if pid > 0 */ - err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, + err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name, usdt_cookie, &targets, &target_cnt); if (err <= 0) { err = (err == 0) ? -ENOENT : err; diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 4160e7d2e120..e51b0490ad57 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -15,12 +15,12 @@ #define MAX_NR_CPUS 4096 -void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) +void perf_cpu_map__set_nr(struct perf_cpu_map *map, unsigned int nr_cpus) { RC_CHK_ACCESS(map)->nr = nr_cpus; } -struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) +struct perf_cpu_map *perf_cpu_map__alloc(unsigned int nr_cpus) { RC_STRUCT(perf_cpu_map) *cpus; struct perf_cpu_map *result; @@ -78,7 +78,7 @@ void perf_cpu_map__put(struct perf_cpu_map *map) static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus, nr_cpus_conf; + long nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) @@ -86,15 +86,13 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void) nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); if (nr_cpus != nr_cpus_conf) { - pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + pr_warning("Number of online CPUs (%ld) differs from the number configured (%ld) the CPU map will only cover the first %ld CPUs.", nr_cpus, nr_cpus_conf, nr_cpus); } cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { - int i; - - for (i = 0; i < nr_cpus; ++i) + for (long i = 0; i < nr_cpus; ++i) RC_CHK_ACCESS(cpus)->map[i].cpu = i; } @@ -132,23 +130,23 @@ static int cmp_cpu(const void *a, const void *b) return cpu_a->cpu - cpu_b->cpu; } -static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx) { return RC_CHK_ACCESS(cpus)->map[idx]; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(unsigned int nr_cpus, const struct perf_cpu *tmp_cpus) { size_t payload_size = nr_cpus * sizeof(struct perf_cpu); struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); - int i, j; if (cpus != NULL) { + unsigned int j = 0; + memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size); qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ - j = 0; - for (i = 0; i < nr_cpus; i++) { + for (unsigned int i = 0; i < nr_cpus; i++) { if (i == 0 || __perf_cpu_map__cpu(cpus, i).cpu != __perf_cpu_map__cpu(cpus, i - 1).cpu) { @@ -167,9 +165,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) struct perf_cpu_map *cpus = NULL; unsigned long start_cpu, end_cpu = 0; char *p = NULL; - int i, nr_cpus = 0; + unsigned int nr_cpus = 0, max_entries = 0; struct perf_cpu *tmp_cpus = NULL, *tmp; - int max_entries = 0; if (!cpu_list) return perf_cpu_map__new_online_cpus(); @@ -208,9 +205,10 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ - for (i = 0; i < nr_cpus; i++) + for (unsigned int i = 0; i < nr_cpus; i++) { if (tmp_cpus[i].cpu == (int16_t)start_cpu) goto invalid; + } if (nr_cpus == max_entries) { max_entries += max(end_cpu - start_cpu + 1, 16UL); @@ -252,12 +250,12 @@ struct perf_cpu_map *perf_cpu_map__new_int(int cpu) return cpus; } -static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) +static unsigned int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) { return RC_CHK_ACCESS(cpus)->nr; } -struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx) { struct perf_cpu result = { .cpu = -1 @@ -269,7 +267,7 @@ struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) return result; } -int perf_cpu_map__nr(const struct perf_cpu_map *cpus) +unsigned int perf_cpu_map__nr(const struct perf_cpu_map *cpus) { return cpus ? __perf_cpu_map__nr(cpus) : 1; } @@ -294,7 +292,7 @@ bool perf_cpu_map__is_empty(const struct perf_cpu_map *map) int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { - int low, high; + unsigned int low, high; if (!cpus) return -1; @@ -324,7 +322,7 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs) { - int nr; + unsigned int nr; if (lhs == rhs) return true; @@ -336,7 +334,7 @@ bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_m if (nr != __perf_cpu_map__nr(rhs)) return false; - for (int idx = 0; idx < nr; idx++) { + for (unsigned int idx = 0; idx < nr; idx++) { if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu) return false; } @@ -353,7 +351,7 @@ struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map) struct perf_cpu cpu, result = { .cpu = -1 }; - int idx; + unsigned int idx; perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) { result = cpu; @@ -384,7 +382,7 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a)) return false; - for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { + for (unsigned int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu) return false; if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) { @@ -410,8 +408,7 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) { struct perf_cpu *tmp_cpus; - int tmp_len; - int i, j, k; + unsigned int tmp_len, i, j, k; struct perf_cpu_map *merged; if (perf_cpu_map__is_subset(*orig, other)) @@ -455,7 +452,7 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - int i, j, k; + unsigned int i, j, k; struct perf_cpu_map *merged; if (perf_cpu_map__is_subset(other, orig)) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 13a307fc75ae..f747c0bc692d 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -127,7 +127,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { struct perf_cpu cpu; - int idx, thread, err = 0; + unsigned int idx; + int thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -460,7 +461,7 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread) { struct perf_cpu cpu __maybe_unused; - int idx; + unsigned int idx; int err; perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) { @@ -499,12 +500,13 @@ int perf_evsel__disable(struct perf_evsel *evsel) int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { - int err = 0, i; + int err = 0; - for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) + for (unsigned int i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) { err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter, i); + } return err; } diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index e2be2d17c32b..c19678188b17 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -16,16 +16,16 @@ DECLARE_RC_STRUCT(perf_cpu_map) { refcount_t refcnt; /** Length of the map array. */ - int nr; + unsigned int nr; /** The CPU values. */ struct perf_cpu map[]; }; -struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus); +struct perf_cpu_map *perf_cpu_map__alloc(unsigned int nr_cpus); int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); -void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus); +void perf_cpu_map__set_nr(struct perf_cpu_map *map, unsigned int nr_cpus); static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map) { diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 58cc5c5fa47c..a1dd25db65b6 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -49,7 +49,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index * is invalid. */ -LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx); /** * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a * cpu of -1 for an invalid index, this makes an empty map @@ -57,7 +57,7 @@ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, i * the result is the number CPUs in the map plus one if the * "any CPU"/dummy value is present. */ -LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); +LIBPERF_API unsigned int perf_cpu_map__nr(const struct perf_cpu_map *cpus); /** * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. */ diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py index aba22c33393d..0b1ffdf9f7a3 100755 --- a/tools/lib/python/jobserver.py +++ b/tools/lib/python/jobserver.py @@ -8,14 +8,14 @@ """ Interacts with the POSIX jobserver during the Kernel build time. -A "normal" jobserver task, like the one initiated by a make subrocess would do: +A "normal" jobserver task, like the one initiated by a make subprocess would do: - open read/write file descriptors to communicate with the job server; - ask for one slot by calling:: claim = os.read(reader, 1) - - when the job finshes, call:: + - when the job finishes, call:: os.write(writer, b"+") # os.write(writer, claim) diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py new file mode 100644 index 000000000000..cb95f5172448 --- /dev/null +++ b/tools/lib/python/kdoc/c_lex.py @@ -0,0 +1,662 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. + +Please notice that the code here may rise exceptions to indicate bad +usage inside kdoc to indicate problems at the replace pattern. + +Other errors are logged via log instance. +""" + +import logging +import re + +from copy import copy + +from .kdoc_re import KernRe + +log = logging.getLogger(__name__) + +def tokenizer_set_log(logger, prefix = ""): + """ + Replace the module‑level logger with a LoggerAdapter that + prepends *prefix* to every message. + """ + global log + + class PrefixAdapter(logging.LoggerAdapter): + """ + Ancillary class to set prefix on all message logs. + """ + def process(self, msg, kwargs): + return f"{prefix}{msg}", kwargs + + # Wrap the provided logger in our adapter + log = PrefixAdapter(logger, {"prefix": prefix}) + +class CToken(): + """ + Data class to define a C token. + """ + + # Tokens that can be used by the parser. Works like an C enum. + + COMMENT = 0 #: A standard C or C99 comment, including delimiter. + STRING = 1 #: A string, including quotation marks. + CHAR = 2 #: A character, including apostophes. + NUMBER = 3 #: A number. + PUNC = 4 #: A puntuation mark: / ``,`` / ``.``. + BEGIN = 5 #: A begin character: ``{`` / ``[`` / ``(``. + END = 6 #: A end character: ``}`` / ``]`` / ``)``. + CPP = 7 #: A preprocessor macro. + HASH = 8 #: The hash character - useful to handle other macros. + OP = 9 #: A C operator (add, subtract, ...). + STRUCT = 10 #: A ``struct`` keyword. + UNION = 11 #: An ``union`` keyword. + ENUM = 12 #: A ``struct`` keyword. + TYPEDEF = 13 #: A ``typedef`` keyword. + NAME = 14 #: A name. Can be an ID or a type. + SPACE = 15 #: Any space characters, including new lines + ENDSTMT = 16 #: End of an statement (``;``). + + BACKREF = 17 #: Not a valid C sequence, but used at sub regex patterns. + + MISMATCH = 255 #: an error indicator: should never happen in practice. + + # Dict to convert from an enum interger into a string. + _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)} + + # Dict to convert from string to an enum-like integer value. + _name_to_val = {k: v for v, k in _name_by_val.items()} + + @staticmethod + def to_name(val): + """Convert from an integer value from CToken enum into a string""" + + return CToken._name_by_val.get(val, f"UNKNOWN({val})") + + @staticmethod + def from_name(name): + """Convert a string into a CToken enum value""" + if name in CToken._name_to_val: + return CToken._name_to_val[name] + + return CToken.MISMATCH + + + def __init__(self, kind, value=None, pos=0, + brace_level=0, paren_level=0, bracket_level=0): + self.kind = kind + self.value = value + self.pos = pos + self.level = (bracket_level, paren_level, brace_level) + + def __repr__(self): + name = self.to_name(self.kind) + if isinstance(self.value, str): + value = '"' + self.value + '"' + else: + value = self.value + + return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})" + +#: Regexes to parse C code, transforming it into tokens. +RE_SCANNER_LIST = [ + # + # Note that \s\S is different than .*, as it also catches \n + # + (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"), + + (CToken.STRING, r'"(?:\\.|[^"\\])*"'), + (CToken.CHAR, r"'(?:\\.|[^'\\])'"), + + (CToken.NUMBER, r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|" + r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"), + + (CToken.ENDSTMT, r"(?:\s+;|;)"), + + (CToken.PUNC, r"[,\.]"), + + (CToken.BEGIN, r"[\[\(\{]"), + + (CToken.END, r"[\]\)\}]"), + + (CToken.CPP, r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"), + + (CToken.HASH, r"#"), + + (CToken.OP, r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%=" + r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"), + + (CToken.STRUCT, r"\bstruct\b"), + (CToken.UNION, r"\bunion\b"), + (CToken.ENUM, r"\benum\b"), + (CToken.TYPEDEF, r"\btypedef\b"), + + (CToken.NAME, r"[A-Za-z_]\w*"), + + (CToken.SPACE, r"\s+"), + + (CToken.BACKREF, r"\\\d+"), + + (CToken.MISMATCH,r"."), +] + +def fill_re_scanner(token_list): + """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex""" + re_tokens = [] + + for kind, pattern in token_list: + name = CToken.to_name(kind) + re_tokens.append(f"(?P<{name}>{pattern})") + + return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL) + +#: Handle C continuation lines. +RE_CONT = KernRe(r"\\\n") + +RE_COMMENT_START = KernRe(r'/\*\s*') + +#: tokenizer regex. Will be filled at the first CTokenizer usage. +RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST) + + +class CTokenizer(): + """ + Scan C statements and definitions and produce tokens. + + When converted to string, it drops comments and handle public/private + values, respecting depth. + """ + + # This class is inspired and follows the basic concepts of: + # https://docs.python.org/3/library/re.html#writing-a-tokenizer + + def __init__(self, source=None): + """ + Create a regular expression to handle RE_SCANNER_LIST. + + While I generally don't like using regex group naming via: + (?P<name>...) + + in this particular case, it makes sense, as we can pick the name + when matching a code via RE_SCANNER. + """ + + # + # Store logger to allow parser classes to re-use it + # + global log + self.log = log + + self.tokens = [] + + if not source: + return + + if isinstance(source, list): + self.tokens = source + return + + # + # While we could just use _tokenize directly via interator, + # As we'll need to use the tokenizer several times inside kernel-doc + # to handle macro transforms, cache the results on a list, as + # re-using it is cheaper than having to parse everytime. + # + for tok in self._tokenize(source): + self.tokens.append(tok) + + def _tokenize(self, source): + """ + Iterator that parses ``source``, splitting it into tokens, as defined + at ``self.RE_SCANNER_LIST``. + + The interactor returns a CToken class object. + """ + + # Handle continuation lines. Note that kdoc_parser already has a + # logic to do that. Still, let's keep it for completeness, as we might + # end re-using this tokenizer outsize kernel-doc some day - or we may + # eventually remove from there as a future cleanup. + source = RE_CONT.sub("", source) + + brace_level = 0 + paren_level = 0 + bracket_level = 0 + + for match in RE_SCANNER.finditer(source): + kind = CToken.from_name(match.lastgroup) + pos = match.start() + value = match.group() + + if kind == CToken.MISMATCH: + log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'") + elif kind == CToken.BEGIN: + if value == '(': + paren_level += 1 + elif value == '[': + bracket_level += 1 + else: # value == '{' + brace_level += 1 + + elif kind == CToken.END: + if value == ')' and paren_level > 0: + paren_level -= 1 + elif value == ']' and bracket_level > 0: + bracket_level -= 1 + elif brace_level > 0: # value == '}' + brace_level -= 1 + + yield CToken(kind, value, pos, + brace_level, paren_level, bracket_level) + + def __str__(self): + out="" + show_stack = [True] + + for i, tok in enumerate(self.tokens): + if tok.kind == CToken.BEGIN: + show_stack.append(show_stack[-1]) + + elif tok.kind == CToken.END: + prev = show_stack[-1] + if len(show_stack) > 1: + show_stack.pop() + + if not prev and show_stack[-1]: + # + # Try to preserve indent + # + out += "\t" * (len(show_stack) - 1) + + out += str(tok.value) + continue + + elif tok.kind == CToken.COMMENT: + comment = RE_COMMENT_START.sub("", tok.value) + + if comment.startswith("private:"): + show_stack[-1] = False + show = False + elif comment.startswith("public:"): + show_stack[-1] = True + + continue + + if not show_stack[-1]: + continue + + if i < len(self.tokens) - 1: + next_tok = self.tokens[i + 1] + + # Do some cleanups before ";" + + if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT: + continue + + if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind: + continue + + out += str(tok.value) + + return out + + +class CTokenArgs: + """ + Ancillary class to help using backrefs from sub matches. + + If the highest backref contain a "+" at the last element, + the logic will be greedy, picking all other delims. + + This is needed to parse struct_group macros with end with ``MEMBERS...``. + """ + def __init__(self, sub_str): + self.sub_groups = set() + self.max_group = -1 + self.greedy = None + + for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str): + group = int(m.group(1)) + if m.group(2) == "+": + if self.greedy and self.greedy != group: + raise ValueError("There are multiple greedy patterns!") + self.greedy = group + + self.sub_groups.add(group) + self.max_group = max(self.max_group, group) + + if self.greedy: + if self.greedy != self.max_group: + raise ValueError("Greedy pattern is not the last one!") + + sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str) + + self.sub_str = sub_str + self.sub_tokeninzer = CTokenizer(sub_str) + + def groups(self, new_tokenizer): + r""" + Create replacement arguments for backrefs like: + + ``\0``, ``\1``, ``\2``, ... ``\{number}`` + + It also accepts a ``+`` character to the highest backref, like + ``\4+``. When used, the backref will be greedy, picking all other + arguments afterwards. + + The logic is smart enough to only go up to the maximum required + argument, even if there are more. + + If there is a backref for an argument above the limit, it will + raise an exception. Please notice that, on C, square brackets + don't have any separator on it. Trying to use ``\1``..``\n`` for + brackets also raise an exception. + """ + + level = (0, 0, 0) + + if self.max_group < 0: + return level, [] + + tokens = new_tokenizer.tokens + + # + # Fill \0 with the full token contents + # + groups_list = [ [] ] + + if 0 in self.sub_groups: + inner_level = 0 + + for i in range(0, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + + # + # Discard first begin + # + if not groups_list[0]: + continue + elif tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if inner_level: + groups_list[0].append(tok) + + if not self.max_group: + return level, groups_list + + delim = None + + # + # Ignore everything before BEGIN. The value of begin gives the + # delimiter to be used for the matches + # + for i in range(0, len(tokens)): + tok = tokens[i] + if tok.kind == CToken.BEGIN: + if tok.value == "{": + delim = ";" + elif tok.value == "(": + delim = "," + else: + self.log.error(fr"Can't handle \1..\n on {sub_str}") + + level = tok.level + break + + pos = 1 + groups_list.append([]) + + inner_level = 0 + for i in range(i + 1, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + if tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value: + pos += 1 + if self.greedy and pos > self.max_group: + pos -= 1 + else: + groups_list.append([]) + + if pos > self.max_group: + break + + continue + + groups_list[pos].append(tok) + + if pos < self.max_group: + log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}") + + return level, groups_list + + def tokens(self, new_tokenizer): + level, groups = self.groups(new_tokenizer) + + new = CTokenizer() + + for tok in self.sub_tokeninzer.tokens: + if tok.kind == CToken.BACKREF: + group = int(tok.value[1:]) + + for group_tok in groups[group]: + new_tok = copy(group_tok) + + new_level = [0, 0, 0] + + for i in range(0, len(level)): + new_level[i] = new_tok.level[i] + level[i] + + new_tok.level = tuple(new_level) + + new.tokens += [ new_tok ] + else: + new.tokens += [ tok ] + + return new.tokens + + +class CMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern:: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parentheses of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + + def __init__(self, regex, delim="("): + self.regex = KernRe("^" + regex + r"\b") + self.start_delim = delim + + def _search(self, tokenizer): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to raise exceptions + to cover such issues. + """ + + start = None + started = False + + import sys + + stack = [] + + for i, tok in enumerate(tokenizer.tokens): + if start is None: + if tok.kind == CToken.NAME and self.regex.match(tok.value): + start = i + stack.append((start, tok.level)) + started = False + + continue + + if not started: + if tok.kind == CToken.SPACE: + continue + + if tok.kind == CToken.BEGIN and tok.value == self.start_delim: + started = True + continue + + # Name only token without BEGIN/END + if i > start: + i -= 1 + yield start, i + start = None + + if tok.kind == CToken.END and tok.level == stack[-1][1]: + start, level = stack.pop() + + yield start, i + start = None + + # + # If an END zeroing levels is not there, return remaining stuff + # This is meant to solve cases where the caller logic might be + # picking an incomplete block. + # + if start and stack: + if started: + s = str(tokenizer) + log.warning(f"can't find a final end at {s}") + + yield start, len(tokenizer.tokens) + + def search(self, source): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + if isinstance(source, CTokenizer): + tokenizer = source + is_token = True + else: + tokenizer = CTokenizer(source) + is_token = False + + for start, end in self._search(tokenizer): + new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1]) + + if is_token: + yield new_tokenizer + else: + yield str(new_tokenizer) + + def sub(self, sub_str, source, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if the sub argument contains:: + + r'\0' + + it will work just like re: it places there the matched paired data + with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + if isinstance(source, CTokenizer): + is_token = True + tokenizer = source + else: + is_token = False + tokenizer = CTokenizer(source) + + # Detect if sub_str contains sub arguments + + args_match = CTokenArgs(sub_str) + + new_tokenizer = CTokenizer() + pos = 0 + n = 0 + + # + # NOTE: the code below doesn't consider overlays at sub. + # We may need to add some extra unit tests to check if those + # would cause problems. When replacing by "", this should not + # be a problem, but other transformations could be problematic + # + for start, end in self._search(tokenizer): + new_tokenizer.tokens += tokenizer.tokens[pos:start] + + new = CTokenizer(tokenizer.tokens[start:end + 1]) + + new_tokenizer.tokens += args_match.tokens(new) + + pos = end + 1 + + n += 1 + if count and n >= count: + break + + new_tokenizer.tokens += tokenizer.tokens[pos:] + + if not is_token: + return str(new_tokenizer) + + return new_tokenizer + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'CMatch("{self.regex.regex.pattern}")' diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 022487ea2cc6..ed82b6e6ab25 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -9,13 +9,14 @@ Classes for navigating through the files that kernel-doc needs to handle to generate documentation. """ -import argparse import logging import os import re from kdoc.kdoc_parser import KernelDoc +from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat +from kdoc.kdoc_yaml_file import KDocTestFile class GlobSourceFiles: @@ -86,11 +87,81 @@ class GlobSourceFiles: file_not_found_cb(fname) +class KdocConfig(): + """ + Stores all configuration attributes that kdoc_parser and kdoc_output + needs. + """ + def __init__(self, verbose=False, werror=False, wreturn=False, + wshort_desc=False, wcontents_before_sections=False, + logger=None): + + self.verbose = verbose + self.werror = werror + self.wreturn = wreturn + self.wshort_desc = wshort_desc + self.wcontents_before_sections = wcontents_before_sections + + if logger: + self.log = logger + else: + self.log = logging.getLogger(__file__) + + self.warning = self.log.warning + class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. - There are two type of parsers defined here: + This is the main entry point to run kernel-doc. This class is initialized + using a series of optional arguments: + + ``verbose`` + If True, enables kernel-doc verbosity. Default: False. + + ``out_style`` + Class to be used to format output. If None (default), + only report errors. + + ``xforms`` + Transforms to be applied to C prototypes and data structs. + If not specified, defaults to xforms = CFunction() + + ``werror`` + If True, treat warnings as errors, retuning an error code on warnings. + + Default: False. + + ``wreturn`` + If True, warns about the lack of a return markup on functions. + + Default: False. + ``wshort_desc`` + If True, warns if initial short description is missing. + + Default: False. + + ``wcontents_before_sections`` + If True, warn if there are contents before sections (deprecated). + This option is kept just for backward-compatibility, but it does + nothing, neither here nor at the original Perl script. + + Default: False. + + ``logger`` + Optional logger class instance. + + If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + + ``yaml_file`` + If defined, stores the output inside a YAML file. + + ``yaml_content`` + Defines what will be inside the YAML file. + + Note: + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and ``EXPORT_SYMBOL*`` macros; - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros. @@ -117,7 +188,12 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname) + if self.test_file: + store_src = True + else: + store_src = False + + doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -153,16 +229,21 @@ class KernelFiles(): self.error(f"Cannot find file {fname}") - def __init__(self, verbose=False, out_style=None, + def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None): + yaml_file=None, yaml_content=None, logger=None): """ Initialize startup variables and parse all files. """ if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + try: + verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0))) + except ValueError: + # Handles an eventual case where verbosity is not a number + # like KBUILD_VERBOSE="" + verbose = False if out_style is None: out_style = OutputFormat() @@ -181,29 +262,36 @@ class KernelFiles(): if kdoc_werror: werror = kdoc_werror + if not logger: + logger = logging.getLogger("kernel-doc") + else: + logger = logger + # Some variables are global to the parser logic as a whole as they are # used to send control configuration to KernelDoc class. As such, # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace + self.config = KdocConfig(verbose, werror, wreturn, wshort_desc, + wcontents_before_sections, logger) - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections + # Override log warning, as we want to count errors + self.config.warning = self.warning - if not logger: - self.config.log = logging.getLogger("kernel-doc") + if yaml_file: + self.test_file = KDocTestFile(self.config, yaml_file, yaml_content) else: - self.config.log = logger + self.test_file = None - self.config.warning = self.warning + if xforms: + self.xforms = xforms + else: + self.xforms = CTransforms() self.config.src_tree = os.environ.get("SRCTREE", None) # Initialize variables that are internal to KernelFiles self.out_style = out_style + self.out_style.set_config(self.config) self.errors = 0 self.results = {} @@ -246,8 +334,6 @@ class KernelFiles(): returning kernel-doc markups on each interaction. """ - self.out_style.set_config(self.config) - if not filenames: filenames = sorted(self.results.keys()) @@ -267,29 +353,28 @@ class KernelFiles(): for s in symbol: function_table.add(s) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] - self.out_style.set_symbols(symbols) - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) + if self.test_file: + self.test_file.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") + self.test_file.output_symbols(fname, symbols) - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m + continue + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg + + if self.test_file: + self.test_file.write() diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 2b8a93f79716..a7aa6e1e4c1c 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -14,7 +14,8 @@ class KdocItem: then pass into the output modules. """ - def __init__(self, name, fname, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, + **other_stuff): self.name = name self.fname = fname self.type = type @@ -22,15 +23,34 @@ class KdocItem: self.sections = {} self.sections_start_lines = {} self.parameterlist = [] - self.parameterdesc_start_lines = [] + self.parameterdesc_start_lines = {} self.parameterdescs = {} self.parametertypes = {} + + self.warnings = [] + # # Just save everything else into our own dict so that the output # side can grab it directly as before. As we move things into more # structured data, this will, hopefully, fade away. # - self.other_stuff = other_stuff + known_keys = { + 'declaration_start_line', + 'sections', + 'sections_start_lines', + 'parameterlist', + 'parameterdesc_start_lines', + 'parameterdescs', + 'parametertypes', + 'warnings', + } + + self.other_stuff = {} + for k, v in other_stuff.items(): + if k in known_keys: + setattr(self, k, v) # real attribute + else: + self.other_stuff[k] = v def get(self, key, default = None): """ @@ -41,6 +61,23 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + def __repr__(self): + return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})" + + @classmethod + def from_dict(cls, d): + """Create a KdocItem from a plain dict.""" + + cp = d.copy() + name = cp.pop('name', None) + fname = cp.pop('fname', None) + type = cp.pop('type', None) + start_line = cp.pop('start_line', 1) + other_stuff = cp.pop('other_stuff', {}) + + # Everything that’s left goes straight to __init__ + return cls(name, fname, type, start_line, **cp, **other_stuff) + # # Tracking of section and parameter information. # @@ -49,7 +86,7 @@ class KdocItem: Set sections and start lines. """ self.sections = sections - self.section_start_lines = start_lines + self.sections_start_lines = start_lines def set_params(self, names, descs, types, starts): """ diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 4210b91dde5f..de107ab4a281 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -222,6 +222,27 @@ class OutputFormat: return None + def output_symbols(self, fname, symbols): + """ + Handles a set of KdocItem symbols. + """ + self.set_symbols(symbols) + + msg = "" + for arg in symbols: + m = self.msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + return msg + # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. def set_symbols(self, symbols): @@ -368,7 +389,7 @@ class RestFormat(OutputFormat): else: self.data += f'{self.lineprefix}**{section}**\n\n' - self.print_lineno(args.section_start_lines.get(section, 0)) + self.print_lineno(args.sections_start_lines.get(section, 0)) self.output_highlight(text) self.data += "\n" self.data += "\n" @@ -492,7 +513,9 @@ class RestFormat(OutputFormat): def out_var(self, fname, name, args): oldprefix = self.lineprefix ln = args.declaration_start_line - full_proto = args.other_stuff["full_proto"] + full_proto = args.other_stuff.get("full_proto") + if not full_proto: + raise KeyError(f"Can't find full proto for {name} variable") self.lineprefix = " " @@ -580,7 +603,35 @@ class RestFormat(OutputFormat): class ManFormat(OutputFormat): - """Consts and functions used by man pages output.""" + """ + Consts and functions used by man pages output. + + This class has one mandatory parameter and some optional ones, which + are needed to define the title header contents: + + ``modulename`` + Defines the module name to be used at the troff ``.TH`` output. + + This argument is optional. If not specified, it will be filled + with the directory which contains the documented file. + + ``section`` + Usually a numeric value from 0 to 9, but man pages also accept + some strings like "p". + + Defauls to ``9`` + + ``manual`` + Defaults to ``Kernel API Manual``. + + The above controls the output of teh corresponding fields on troff + title headers, which will be filled like this:: + + .TH "{name}" {section} "{date}" "{modulename}" "{manual}" + + where ``name``` will match the API symbol name, and ``date`` will be + either the date where the Kernel was compiled or the current date + """ highlights = ( (type_constant, r"\1"), @@ -607,7 +658,21 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def __init__(self, modulename): + def modulename(self, args): + if self._modulename: + return self._modulename + + return os.path.dirname(args.fname) + + def emit_th(self, name, args): + """Emit a title header line.""" + title = name.strip() + module = self.modulename(args) + + self.data += f'.TH "{title}" {self.section} "{self.date}" ' + self.data += f'"{module}" "{self.manual}"\n' + + def __init__(self, modulename=None, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -616,7 +681,11 @@ class ManFormat(OutputFormat): """ super().__init__() - self.modulename = modulename + + self._modulename = modulename + self.section = section + self.manual = manual + self.symbols = [] dt = None @@ -632,7 +701,7 @@ class ManFormat(OutputFormat): if not dt: dt = datetime.now() - self.man_date = dt.strftime("%B %Y") + self.date = dt.strftime("%B %Y") def arg_name(self, args, name): """ @@ -647,7 +716,8 @@ class ManFormat(OutputFormat): dtype = args.type if dtype == "doc": - return self.modulename + return name +# return os.path.basename(self.modulename(args)) if dtype in ["function", "typedef"]: return name @@ -697,6 +767,185 @@ class ManFormat(OutputFormat): return self.data + def emit_table(self, colspec_row, rows): + + if not rows: + return "" + + out = "" + colspec = "\t".join(["l"] * len(rows[0])) + + out += "\n.TS\n" + out += "box;\n" + out += f"{colspec}.\n" + + if colspec_row: + out_row = [] + + for text in colspec_row: + out_row.append(f"\\fB{text}\\fP") + + out += "\t".join(out_row) + "\n_\n" + + for r in rows: + out += "\t".join(r) + "\n" + + out += ".TE\n" + + return out + + def grid_table(self, lines, start): + """ + Ancillary function to help handling a grid table inside the text. + """ + + i = start + 1 + rows = [] + colspec_row = None + + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*\|.*\|\s*$").match(line): + parts = [] + + for p in line.strip('|').split('|'): + parts.append(p.strip()) + + rows.append(parts) + + elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + elif KernRe(r"^\s*\+[-+]+\+.*$").match(line): + pass + + else: + break + + i += 1 + + return i, self.emit_table(colspec_row, rows) + + def simple_table(self, lines, start): + """ + Ancillary function to help handling a simple table inside the text. + """ + + i = start + rows = [] + colspec_row = None + + pos = [] + for m in KernRe(r'\=+').finditer(lines[i]): + pos.append((m.start(), m.end() - 1)) + + i += 1 + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line): + i += 1 + break + + elif KernRe(r'^[\s=]+$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + else: + row = [""] * len(pos) + + for j in range(0, len(pos)): + start, end = pos[j] + + row[j] = line[start:end].strip() + + rows.append(row) + + i += 1 + + return i, self.emit_table(colspec_row, rows) + + def code_block(self, lines, start): + """ + Ensure that code blocks won't be messed up at the output. + + By default, troff join lines at the same paragraph. Disable it, + on code blocks. + """ + + line = lines[start] + + if "code-block" in line: + out = "\n.nf\n" + elif line.startswith("..") and line.endswith("::"): + # + # Handle note, warning, error, ... markups + # + line = line[2:-1].strip().upper() + out = f"\n.nf\n\\fB{line}\\fP\n" + elif line.endswith("::"): + out = line[:-1] + out += "\n.nf\n" + else: + # Just in case. Should never happen in practice + out = "\n.nf\n" + + i = start + 1 + ident = None + + while i < len(lines): + line = lines[i] + + m = KernRe(r"\S").match(line) + if not m: + out += line + "\n" + i += 1 + continue + + pos = m.start() + if not ident: + if pos > 0: + ident = pos + else: + out += "\n.fi\n" + if i > start + 1: + return i - 1, out + else: + # Just in case. Should never happen in practice + return i, out + + if pos >= ident: + out += line + "\n" + i += 1 + continue + + break + + out += "\n.fi\n" + return i, out + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -708,15 +957,46 @@ class ManFormat(OutputFormat): if isinstance(contents, list): contents = "\n".join(contents) - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue + lines = contents.strip("\n").split("\n") + i = 0 - if line[0] == ".": - self.data += "\\&" + line + "\n" + while i < len(lines): + org_line = lines[i] + + line = KernRe(r"^\s*").sub("", org_line) + + if line: + if KernRe(r"^\+\-[-+]+\+.*$").match(line): + i, text = self.grid_table(lines, i) + self.data += text + continue + + if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line): + i, text = self.simple_table(lines, i) + self.data += text + continue + + if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line): + i, text = self.code_block(lines, i) + self.data += text + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + i += 1 + continue + + # + # Handle lists + # + line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line) + line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line) else: - self.data += line + "\n" + line = ".PP\n" + + i += 1 + + self.data += line + "\n" def out_doc(self, fname, name, args): if not self.check_doc(name, args): @@ -724,7 +1004,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -734,7 +1014,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -780,7 +1060,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -813,7 +1093,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -830,11 +1110,11 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_typedef(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -844,12 +1124,12 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_struct(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') definition = args.get('definition') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..c3f966da533e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,8 @@ import sys import re from pprint import pformat -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.c_lex import CTokenizer, tokenizer_set_log +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # @@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' cache = False) # -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -215,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ @@ -276,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -356,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -382,11 +246,15 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms + self.store_src = store_src + + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") # Initial state for the state machines self.state = state.NORMAL @@ -449,7 +317,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) @@ -571,6 +439,11 @@ class KernelDoc: # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + # Replace '[at_least ' with '[static '. This allows sphinx to parse + # array parameter declarations like 'char A[at_least 4]', where + # 'at_least' is #defined to 'static' by the kernel headers. + arg = arg.replace('[at_least ', '[static ') + # Strip leading/trailing spaces arg = arg.strip() arg = KernRe(r'\s+').sub(' ', arg, count=1) @@ -849,13 +722,15 @@ class KernelDoc: return declaration - def dump_struct(self, ln, proto): + def dump_struct(self, ln, proto, source): """ Store an entry for a ``struct`` or ``union`` """ # # Do the basic parse to get the pieces of the declaration. # + source = source + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -869,12 +744,8 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.xforms.apply("struct", members) - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # @@ -888,10 +759,11 @@ class KernelDoc: declaration_name) self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, + source=source, definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) - def dump_enum(self, ln, proto): + def dump_enum(self, ln, proto, source): """ Store an ``enum`` inside self.entries array. """ @@ -899,6 +771,8 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + source = source + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -906,7 +780,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -914,7 +788,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # @@ -943,9 +817,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -961,29 +836,23 @@ class KernelDoc: f"Excess enum value '@{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, + source=source, purpose=self.entry.declaration_purpose) - def dump_var(self, ln, proto): + def dump_var(self, ln, proto, source): """ Store variables that are part of kAPI. """ VAR_ATTRIBS = [ "extern", + "const", ] - OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] + OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" # # Store the full prototype before modifying it # + source = source full_proto = proto declaration_name = None @@ -1004,8 +873,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: - proto = r.sub(sub, proto) + proto = self.xforms.apply("var", proto) proto = proto.rstrip() @@ -1015,17 +883,17 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return @@ -1034,39 +902,38 @@ class KernelDoc: default_val = default_val.lstrip("=").strip() self.output_declaration("var", declaration_name, + source=source, full_proto=full_proto, default_val=default_val, purpose=self.entry.declaration_purpose) - def dump_declaration(self, ln, prototype): + def dump_declaration(self, ln, prototype, source): """ Store a data declaration inside self.entries array. """ if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) + self.dump_enum(ln, prototype, source) elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) + self.dump_typedef(ln, prototype, source) elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) + self.dump_struct(ln, prototype, source) elif self.entry.decl_type == "var": - self.dump_var(ln, prototype) + self.dump_var(ln, prototype, source) else: # This would be a bug self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - def dump_function(self, ln, prototype): + def dump_function(self, ln, prototype, source): """ Store a function or function macro inside self.entries array. """ + source = source found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1085,6 +952,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = self.xforms.apply("func", prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) @@ -1150,13 +1022,14 @@ class KernelDoc: # Store the result. # self.output_declaration(decl_type, declaration_name, + source=source, typedef=('typedef' in return_type), functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) - def dump_typedef(self, ln, proto): + def dump_typedef(self, ln, proto, source): """ Store a ``typedef`` inside self.entries array. """ @@ -1167,6 +1040,8 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' + source = source + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) @@ -1187,6 +1062,7 @@ class KernelDoc: self.create_parameter_list(ln, 'function', args, ',', declaration_name) self.output_declaration('function', declaration_name, + source=source, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1204,6 +1080,7 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, + source=source, purpose=self.entry.declaration_purpose) return @@ -1241,7 +1118,7 @@ class KernelDoc: function_set.add(symbol) return True - def process_normal(self, ln, line): + def process_normal(self, ln, line, source): """ STATE_NORMAL: looking for the ``/**`` to begin everything. """ @@ -1255,7 +1132,7 @@ class KernelDoc: # next line is always the function name self.state = state.NAME - def process_name(self, ln, line): + def process_name(self, ln, line, source): """ STATE_NAME: Looking for the "name - description" line """ @@ -1388,7 +1265,7 @@ class KernelDoc: return False - def process_decl(self, ln, line): + def process_decl(self, ln, line, source): """ STATE_DECLARATION: We've seen the beginning of a declaration. """ @@ -1417,7 +1294,7 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") - def process_special(self, ln, line): + def process_special(self, ln, line, source): """ STATE_SPECIAL_SECTION: a section ending with a blank line. """ @@ -1468,7 +1345,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_body(self, ln, line): + def process_body(self, ln, line, source): """ STATE_BODY: the bulk of a kerneldoc comment. """ @@ -1482,7 +1359,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline_name(self, ln, line): + def process_inline_name(self, ln, line, source): """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" if doc_inline_sect.search(line): @@ -1495,9 +1372,15 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line, source) # else ... ?? - def process_inline_text(self, ln, line): + def process_inline_text(self, ln, line, source): """STATE_INLINE_TEXT: docbook comments within a prototype.""" if doc_inline_end.search(line): @@ -1583,7 +1466,7 @@ class KernelDoc: return proto - def process_proto_function(self, ln, line): + def process_proto_function(self, ln, line, source): """Ancillary routine to process a function prototype.""" # strip C99-style comments to end of line @@ -1625,10 +1508,10 @@ class KernelDoc: # # ... and we're done # - self.dump_function(ln, self.entry.prototype) + self.dump_function(ln, self.entry.prototype, source) self.reset_state(ln) - def process_proto_type(self, ln, line): + def process_proto_type(self, ln, line, source): """ Ancillary routine to process a type. """ @@ -1658,7 +1541,7 @@ class KernelDoc: elif chunk == '}': self.entry.brcount -= 1 elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) + self.dump_declaration(ln, self.entry.prototype, source) self.reset_state(ln) return # @@ -1667,7 +1550,7 @@ class KernelDoc: # self.entry.prototype += ' ' - def process_proto(self, ln, line): + def process_proto(self, ln, line, source): """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): @@ -1679,17 +1562,18 @@ class KernelDoc: self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) + self.process_proto_function(ln, line, source) else: - self.process_proto_type(ln, line) + self.process_proto_type(ln, line, source) - def process_docblock(self, ln, line): + def process_docblock(self, ln, line, source): """STATE_DOCBLOCK: within a ``DOC:`` block.""" if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier) + self.output_declaration("doc", self.entry.identifier, + source=source) self.reset_state(ln) elif doc_content.search(line): @@ -1740,6 +1624,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.state = state.NORMAL + source = "" try: with open(self.fname, "r", encoding="utf8", @@ -1766,6 +1652,12 @@ class KernelDoc: ln, state.name[self.state], line) + if self.store_src: + if source and self.state == state.NORMAL: + source = "" + elif self.state != state.NORMAL: + source += line + "\n" + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1773,8 +1665,11 @@ class KernelDoc: # if (self.state != state.NORMAL) or \ not self.process_export(export_table, line): + prev_state = self.state # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) + self.state_actions[self.state](self, ln, line, source) + if prev_state == state.NORMAL and self.state != state.NORMAL: + source += line + "\n" self.emit_unused_warnings() diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 0bf9e01cdc57..28292efe25a2 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -52,7 +52,33 @@ class KernRe: return self.regex.pattern def __repr__(self): - return f're.compile("{self.regex.pattern}")' + """ + Returns a displayable version of the class init. + """ + + flag_map = { + re.IGNORECASE: "re.I", + re.MULTILINE: "re.M", + re.DOTALL: "re.S", + re.VERBOSE: "re.X", + } + + flags = [] + for flag, name in flag_map.items(): + if self.regex.flags & flag: + flags.append(name) + + flags_name = " | ".join(flags) + + max_len = 60 + pattern = "" + for pos in range(0, len(self.regex.pattern), max_len): + pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" ' + + if flags_name: + return f'KernRe({pattern}, {flags_name})' + else: + return f'KernRe({pattern})' def __add__(self, other): """ @@ -78,6 +104,13 @@ class KernRe: self.last_match = self.regex.search(string) return self.last_match + def finditer(self, string): + """ + Alias to re.finditer. + """ + + return self.regex.finditer(string) + def findall(self, string): """ Alias to re.findall. @@ -106,173 +139,9 @@ class KernRe: return self.last_match.group(num) - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern:: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parentheses of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter-aligned. - # - # The content inside parentheses is converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to: - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and places them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm should work fine for properly paired lines, but will - silently ignore end delimiters that precede a start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to raise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): + def groups(self): """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. + Returns the group results of the last match """ - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - r""" - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if the sub argument contains:: - - r'\1' - - it will work just like re: it places there the matched paired data - with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out + return self.last_match.groups() diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py new file mode 100644 index 000000000000..0be020d50df0 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. + +import os + +from kdoc.kdoc_output import ManFormat, RestFormat + + +class KDocTestFile(): + """ + Handles the logic needed to store kernel‑doc output inside a YAML file. + Useful for unit tests and regression tests. + """ + + def __init__(self, config, yaml_file, yaml_content): + # + # Bail out early if yaml is not available + # + try: + import yaml + except ImportError: + sys.exit("Warning: yaml package not available. Aborting it.") + + self.config = config + self.test_file = os.path.expanduser(yaml_file) + self.yaml_content = yaml_content + self.test_names = set() + + self.tests = [] + + out_dir = os.path.dirname(self.test_file) + if out_dir and not os.path.isdir(out_dir): + sys.exit(f"Directory {out_dir} doesn't exist.") + + self.out_style = [] + + if "man" in self.yaml_content: + out_style = ManFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + if "rst" in self.yaml_content: + out_style = RestFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + def set_filter(self, export, internal, symbol, nosymbol, + function_table, enable_lineno, no_doc_sections): + """ + Set filters at the output classes. + """ + for out_style in self.out_style: + out_style.set_filter(export, internal, symbol, + nosymbol, function_table, + enable_lineno, no_doc_sections) + + @staticmethod + def get_kdoc_item(arg, start_line=1): + + d = vars(arg) + + declaration_start_line = d.get("declaration_start_line") + if not declaration_start_line: + return d + + d["declaration_start_line"] = start_line + + parameterdesc_start_lines = d.get("parameterdesc_start_lines") + if parameterdesc_start_lines: + for key in parameterdesc_start_lines: + ln = parameterdesc_start_lines[key] + ln += start_line - declaration_start_line + + parameterdesc_start_lines[key] = ln + + sections_start_lines = d.get("sections_start_lines") + if sections_start_lines: + for key in sections_start_lines: + ln = sections_start_lines[key] + ln += start_line - declaration_start_line + + sections_start_lines[key] = ln + + return d + + def output_symbols(self, fname, symbols): + """ + Store source, symbols and output strings at self.tests. + """ + + # + # KdocItem needs to be converted into dicts + # + kdoc_item = [] + expected = [] + + # + # Source code didn't produce any symbol + # + if not symbols: + return + + expected_dict = {} + start_line=1 + + for arg in symbols: + source = arg.get("source", "") + + if arg and "KdocItem" in self.yaml_content: + msg = self.get_kdoc_item(arg) + + other_stuff = msg.get("other_stuff", {}) + if "source" in other_stuff: + del other_stuff["source"] + + expected_dict["kdoc_item"] = msg + + base_name = arg.name + if not base_name: + base_name = fname + base_name = base_name.lower().replace(".", "_").replace("/", "_") + + + # Don't add duplicated names + i = 0 + name = base_name + while name in self.test_names: + i += 1 + name = f"{base_name}_{i:03d}" + + self.test_names.add(name) + + for out_style in self.out_style: + if isinstance(out_style, ManFormat): + key = "man" + else: + key = "rst" + + expected_dict[key]= out_style.output_symbols(fname, [arg]).strip() + + test = { + "name": name, + "description": f"{fname} line {arg.declaration_start_line}", + "fname": fname, + "source": source, + "expected": [expected_dict] + } + + self.tests.append(test) + + expected_dict = {} + + def write(self): + """ + Output the content of self.tests to self.test_file. + """ + import yaml + + # Helper function to better handle multilines + def str_presenter(dumper, data): + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + # Register the representer + yaml.add_representer(str, str_presenter) + + data = {"tests": self.tests} + + with open(self.test_file, "w", encoding="utf-8") as fp: + yaml.dump(data, fp, + sort_keys=False, width=120, indent=2, + default_flow_style=False, allow_unicode=True, + explicit_start=False, explicit_end=False) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..f6ea9efb11ae --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. + +import re + +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch, CTokenizer + +struct_args_pattern = r"([^,)]+)" + + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + + #: Transforms for structs and unions. + struct_xforms = [ + (CMatch("__attribute__"), ""), + (CMatch("__aligned"), ""), + (CMatch("__counted_by"), ""), + (CMatch("__counted_by_(le|be)"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("__packed"), ""), + (CMatch("CRYPTO_MINALIGN_ATTR"), ""), + (CMatch("__private"), ""), + (CMatch("__rcu"), ""), + (CMatch("____cacheline_aligned_in_smp"), ""), + (CMatch("____cacheline_aligned"), ""), + (CMatch("__cacheline_group_(?:begin|end)"), ""), + (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"), + (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"), + (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"), + (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"), + (CMatch("DECLARE_KFIFO"), r"\2 *\1"), + (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"), + (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"), + (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"), + (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"), + (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"), + (CMatch("__cond_acquires"), ""), + (CMatch("__cond_releases"), ""), + (CMatch("__acquires"), ""), + (CMatch("__releases"), ""), + (CMatch("__must_hold"), ""), + (CMatch("__must_not_hold"), ""), + (CMatch("__must_hold_shared"), ""), + (CMatch("__cond_acquires_shared"), ""), + (CMatch("__acquires_shared"), ""), + (CMatch("__releases_shared"), ""), + (CMatch("__attribute__"), ""), + + # + # Macro __struct_group() creates an union with an anonymous + # and a non-anonymous struct, depending on the parameters. We only + # need one of those at kernel-doc, as we won't be documenting the same + # members twice. + # + (CMatch("struct_group"), r"struct { \2+ };"), + (CMatch("struct_group_attr"), r"struct { \3+ };"), + (CMatch("struct_group_tagged"), r"struct { \3+ };"), + (CMatch("__struct_group"), r"struct { \4+ };"), + ] + + #: Transforms for function prototypes. + function_xforms = [ + (CMatch("static"), ""), + (CMatch("extern"), ""), + (CMatch("asmlinkage"), ""), + (CMatch("inline"), ""), + (CMatch("__inline__"), ""), + (CMatch("__inline"), ""), + (CMatch("__always_inline"), ""), + (CMatch("noinline"), ""), + (CMatch("__FORTIFY_INLINE"), ""), + (CMatch("__init"), ""), + (CMatch("__init_or_module"), ""), + (CMatch("__exit"), ""), + (CMatch("__deprecated"), ""), + (CMatch("__flatten"), ""), + (CMatch("__meminit"), ""), + (CMatch("__must_check"), ""), + (CMatch("__weak"), ""), + (CMatch("__sched"), ""), + (CMatch("__always_unused"), ""), + (CMatch("__printf"), ""), + (CMatch("__(?:re)?alloc_size"), ""), + (CMatch("__diagnose_as"), ""), + (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"), + (CMatch("__no_context_analysis"), ""), + (CMatch("__attribute_const__"), ""), + (CMatch("__attribute__"), ""), + + # + # HACK: this is similar to process_export() hack. It is meant to + # drop _noproof from function name. See for instance: + # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h. + # + (KernRe("_noprof"), ""), + ] + + #: Transforms for variable prototypes. + var_xforms = [ + (CMatch("__read_mostly"), ""), + (CMatch("__ro_after_init"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("LIST_HEAD"), r"struct list_head \1"), + + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] + + #: Transforms main dictionary used at apply_transforms(). + xforms = { + "struct": struct_xforms, + "func": function_xforms, + "var": var_xforms, + } + + def apply(self, xforms_type, source): + """ + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. + """ + if xforms_type not in self.xforms: + return source + + if isinstance(source, str): + source = CTokenizer(source) + + for search, subst in self.xforms[xforms_type]: + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py new file mode 100755 index 000000000000..f3cba5120401 --- /dev/null +++ b/tools/lib/python/unittest_helper.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025-2026: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0103,R0912,R0914,E1101 + +""" +Provides helper functions and classes execute python unit tests. + +Those help functions provide a nice colored output summary of each +executed test and, when a test fails, it shows the different in diff +format when running in verbose mode, like:: + + $ tools/unittests/nested_match.py -v + ... + Traceback (most recent call last): + File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit + self.assertEqual(replaced, "bar(a); bar(b); foo(c)") + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)' + - bar(a) foo(b); foo(c) + ? ^^^^ + + bar(a); bar(b); foo(c) + ? ^^^^^ + ... + +It also allows filtering what tests will be executed via ``-k`` parameter. + +Typical usage is to do:: + + from unittest_helper import run_unittest + ... + + if __name__ == "__main__": + run_unittest(__file__) + +If passing arguments is needed, on a more complex scenario, it can be +used like on this example:: + + from unittest_helper import TestUnits, run_unittest + ... + env = {'sudo': ""} + ... + if __name__ == "__main__": + runner = TestUnits() + base_parser = runner.parse_args() + base_parser.add_argument('--sudo', action='store_true', + help='Enable tests requiring sudo privileges') + + args = base_parser.parse_args() + + # Update module-level flag + if args.sudo: + env['sudo'] = "1" + + # Run tests with customized arguments + runner.run(__file__, parser=base_parser, args=args, env=env) +""" + +import argparse +import atexit +import os +import re +import unittest +import sys + +from unittest.mock import patch + + +class Summary(unittest.TestResult): + """ + Overrides ``unittest.TestResult`` class to provide a nice colored + summary. When in verbose mode, displays actual/expected difference in + unified diff format. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + #: Dictionary to store organized test results. + self.test_results = {} + + #: max length of the test names. + self.max_name_length = 0 + + def startTest(self, test): + super().startTest(test) + test_id = test.id() + parts = test_id.split(".") + + # Extract module, class, and method names + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + + method_name = parts[-1] + + # Build the hierarchical structure + if module_name not in self.test_results: + self.test_results[module_name] = {} + + if class_name not in self.test_results[module_name]: + self.test_results[module_name][class_name] = [] + + # Track maximum test name length for alignment + display_name = f"{method_name}:" + + self.max_name_length = max(len(display_name), self.max_name_length) + + def _record_test(self, test, status): + test_id = test.id() + parts = test_id.split(".") + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + method_name = parts[-1] + self.test_results[module_name][class_name].append((method_name, status)) + + def addSuccess(self, test): + super().addSuccess(test) + self._record_test(test, "OK") + + def addFailure(self, test, err): + super().addFailure(test, err) + self._record_test(test, "FAIL") + + def addError(self, test, err): + super().addError(test, err) + self._record_test(test, "ERROR") + + def addSkip(self, test, reason): + super().addSkip(test, reason) + self._record_test(test, f"SKIP ({reason})") + + def printResults(self, verbose): + """ + Print results using colors if tty. + """ + # Check for ANSI color support + use_color = sys.stdout.isatty() + COLORS = { + "OK": "\033[32m", # Green + "FAIL": "\033[31m", # Red + "SKIP": "\033[1;33m", # Yellow + "PARTIAL": "\033[33m", # Orange + "EXPECTED_FAIL": "\033[36m", # Cyan + "reset": "\033[0m", # Reset to default terminal color + } + if not use_color: + for c in COLORS: + COLORS[c] = "" + + # Calculate maximum test name length + if not self.test_results: + return + try: + lengths = [] + for module in self.test_results.values(): + for tests in module.values(): + for test_name, _ in tests: + lengths.append(len(test_name) + 1) # +1 for colon + max_length = max(lengths) + 2 # Additional padding + except ValueError: + sys.exit("Test list is empty") + + # Print results + for module_name, classes in self.test_results.items(): + if verbose: + print(f"{module_name}:") + for class_name, tests in classes.items(): + if verbose: + print(f" {class_name}:") + for test_name, status in tests: + if not verbose and status in [ "OK", "EXPECTED_FAIL" ]: + continue + + # Get base status without reason for SKIP + if status.startswith("SKIP"): + status_code = status.split()[0] + else: + status_code = status + color = COLORS.get(status_code, "") + print( + f" {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}" + ) + if verbose: + print() + + # Print summary + print(f"\nRan {self.testsRun} tests", end="") + if hasattr(self, "timeTaken"): + print(f" in {self.timeTaken:.3f}s", end="") + print() + + if not self.wasSuccessful(): + print(f"\n{COLORS['FAIL']}FAILED (", end="") + failures = getattr(self, "failures", []) + errors = getattr(self, "errors", []) + if failures: + print(f"failures={len(failures)}", end="") + if errors: + if failures: + print(", ", end="") + print(f"errors={len(errors)}", end="") + print(f"){COLORS['reset']}") + + +def flatten_suite(suite): + """Flatten test suite hierarchy.""" + tests = [] + for item in suite: + if isinstance(item, unittest.TestSuite): + tests.extend(flatten_suite(item)) + else: + tests.append(item) + return tests + + +class TestUnits: + """ + Helper class to set verbosity level. + + This class discover test files, import its unittest classes and + executes the test on it. + """ + def parse_args(self): + """Returns a parser for command line arguments.""" + parser = argparse.ArgumentParser(description="Test runner with regex filtering") + parser.add_argument("-v", "--verbose", action="count", default=1) + parser.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("-f", "--failfast", action="store_true") + parser.add_argument("-k", "--keyword", + help="Regex pattern to filter test methods") + return parser + + def run(self, caller_file=None, pattern=None, + suite=None, parser=None, args=None, env=None): + """ + Execute all tests from the unity test file. + + It contains several optional parameters: + + ``caller_file``: + - name of the file that contains test. + + typical usage is to place __file__ at the caller test, e.g.:: + + if __name__ == "__main__": + TestUnits().run(__file__) + + ``pattern``: + - optional pattern to match multiple file names. Defaults + to basename of ``caller_file``. + + ``suite``: + - an unittest suite initialized by the caller using + ``unittest.TestLoader().discover()``. + + ``parser``: + - an argparse parser. If not defined, this helper will create + one. + + ``args``: + - an ``argparse.Namespace`` data filled by the caller. + + ``env``: + - environment variables that will be passed to the test suite + + At least ``caller_file`` or ``suite`` must be used, otherwise a + ``TypeError`` will be raised. + """ + if not args: + if not parser: + parser = self.parse_args() + args = parser.parse_args() + + if not caller_file and not suite: + raise TypeError("Either caller_file or suite is needed at TestUnits") + + if args.quiet: + verbose = 0 + else: + verbose = args.verbose + + if not env: + env = os.environ.copy() + + env["VERBOSE"] = f"{verbose}" + + patcher = patch.dict(os.environ, env) + patcher.start() + # ensure it gets stopped after + atexit.register(patcher.stop) + + + if verbose >= 2: + unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite + + # Load ONLY tests from the calling file + if not suite: + if not pattern: + pattern = caller_file + + loader = unittest.TestLoader() + suite = loader.discover(start_dir=os.path.dirname(caller_file), + pattern=os.path.basename(caller_file)) + + # Flatten the suite for environment injection + tests_to_inject = flatten_suite(suite) + + # Filter tests by method name if -k specified + if args.keyword: + try: + pattern = re.compile(args.keyword) + filtered_suite = unittest.TestSuite() + for test in tests_to_inject: # Use the pre-flattened list + method_name = test.id().split(".")[-1] + if pattern.search(method_name): + filtered_suite.addTest(test) + suite = filtered_suite + except re.error as e: + sys.stderr.write(f"Invalid regex pattern: {e}\n") + sys.exit(1) + else: + # Maintain original suite structure if no keyword filtering + suite = unittest.TestSuite(tests_to_inject) + + if verbose >= 2: + resultclass = None + else: + resultclass = Summary + + runner = unittest.TextTestRunner(verbosity=args.verbose, + resultclass=resultclass, + failfast=args.failfast) + result = runner.run(suite) + if resultclass: + result.printResults(verbose) + + sys.exit(not result.wasSuccessful()) + + +def run_unittest(fname): + """ + Basic usage of TestUnits class. + + Use it when there's no need to pass any extra argument to the tests + with. The recommended way is to place this at the end of each + unittest module:: + + if __name__ == "__main__": + run_unittest(__file__) + """ + TestUnits().run(fname) |
