From d842057c4a205084fb3036122c7426963f04e826 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:55 +0100 Subject: docs: kdoc_parser: move transform lists to a separate file Over the time, most of the changes at kernel-doc are related to maintaining a list of transforms to convert macros into pure C code. Place such transforms on a separate module, to cleanup the parser module. There is an advantage on that: QEMU also uses our own kernel-doc, but the xforms list there is different. By placing it on a separate module, we can minimize the differences and make it easier to keep QEMU in sync with Kernel upstream. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/xforms_lists.py | 153 ++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 tools/lib/python/kdoc/xforms_lists.py (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..e6e0302e5dd0 --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . + +import re + +from kdoc.kdoc_re import KernRe + +struct_args_pattern = r'([^,)]+)' + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + #: Transforms for structs and unions. + struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + ] + + #: Transforms for function prototypes. + function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__exit +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), + ] + + #: Transforms for variable prototypes. + var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] + + #: Transforms main dictionary used at apply_transforms(). + xforms = { + "struct": struct_xforms, + "func": function_xforms, + "var": var_xforms, + } + + def apply(self, xforms_type, text): + """ + Apply a set of transforms to a block of text. + """ + if xforms_type not in self.xforms: + return text + + for search, subst in self.xforms[xforms_type]: + text = search.sub(subst, text) + return text -- cgit v1.2.3 From 4ff59bdd93f0e80b5014977502d082c778f96304 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Mar 2026 17:40:56 +0100 Subject: docs: xforms_lists: ignore context analysis and lock attributes Drop context analysis and lock (tracking) attributes to avoid kernel-doc warnings. There are now lots of warnings like these: Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96] int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex) ------------------------------------------------------------------------------------------------^ Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92] int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock) --------------------------------------------------------------------------------------------^ The regex is suggested by Mauro; mine was too greedy. Thanks. Updated context analysis and lock macros list provided by PeterZ. Thanks. [mchehab: modified to be applied after xforms_lists split] Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/ Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <3c7fdfc364a8920f92530b47bdbf4bb29a40371f.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 10 ++++++++++ tools/lib/python/kdoc/xforms_lists.py | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index d7daf658e9d2..503a18212747 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,6 +75,16 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # is allowed. # struct_nested_prefixes = [ + (re.compile(r"__cond_acquires\s*\("), ""), + (re.compile(r"__cond_releases\s*\("), ""), + (re.compile(r"__acquires\s*\("), ""), + (re.compile(r"__releases\s*\("), ""), + (re.compile(r"__must_hold\s*\("), ""), + (re.compile(r"__must_not_hold\s*\("), ""), + (re.compile(r"__must_hold_shared\s*\("), ""), + (re.compile(r"__cond_acquires_shared\s*\("), ""), + (re.compile(r"__acquires_shared\s*\("), ""), + (re.compile(r"__releases_shared\s*\("), ""), (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index e6e0302e5dd0..1bda7c4634c3 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -22,6 +22,8 @@ class CTransforms: (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), (KernRe(r'\s*__packed\s*', re.S), ' '), (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*__private', re.S), ' '), @@ -120,6 +122,7 @@ class CTransforms: (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] @@ -128,6 +131,8 @@ class CTransforms: var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), -- cgit v1.2.3 From 85c2a51357f720fabfb6fa8d2551d87a94e797cb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:41:01 +0100 Subject: docs: kdoc_parser: move nested match transforms to xforms_lists.py As NestedMatch now has a sub method and a declaration close to what KernRe does, we can move the rules to xforms_lists and simplify kdoc_parser a little bit. No functional changes. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <762ce2a58ff024c1b0b6f6a6e05020d1415b8308.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 21 --------------------- tools/lib/python/kdoc/xforms_lists.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index cd9857906a2b..edf70ba139a5 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -69,25 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), -] - # # Ancillary functions # @@ -761,8 +742,6 @@ class KernelDoc: members = trim_private_members(members) members = self.xforms.apply("struct", members) - for search, sub in struct_nested_prefixes: - members = search.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 1bda7c4634c3..c07cbe1e6349 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,7 @@ import re -from kdoc.kdoc_re import KernRe +from kdoc.kdoc_re import KernRe, NestedMatch struct_args_pattern = r'([^,)]+)' @@ -94,6 +94,18 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:34 +0100 Subject: docs: kdoc: replace NestedMatch with CMatch Our previous approach to solve nested structs were to use NestedMatch. It works well, but adding support to parse delimiters is very complex. Instead, use CMatch, which uses a C tokenizer, making the code more reliable and simpler. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 62d8030cf532..efd58c88ff31 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -14,7 +14,7 @@ import re from pprint import pformat from kdoc.c_lex import CTokenizer -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index c07cbe1e6349..7fa7f52cec7b 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,8 @@ import re -from kdoc.kdoc_re import KernRe, NestedMatch +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch struct_args_pattern = r'([^,)]+)' @@ -60,7 +61,7 @@ class CTransforms: # # As it doesn't properly match the end parenthesis on some cases. # - # So, a better solution was crafted: there's now a NestedMatch + # So, a better solution was crafted: there's now a CMatch # class that ensures that delimiters after a search are properly # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. @@ -72,9 +73,9 @@ class CTransforms: # # Replace macros # - # TODO: use NestedMatch for FOO($1, $2, ...) matches + # TODO: use CMatch for FOO($1, $2, ...) matches # - # it is better to also move those to the NestedMatch logic, + # it is better to also move those to the CMatch logic, # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), @@ -95,17 +96,17 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), + (CMatch(r"__cond_acquires"), ""), + (CMatch(r"__cond_releases"), ""), + (CMatch(r"__acquires"), ""), + (CMatch(r"__releases"), ""), + (CMatch(r"__must_hold"), ""), + (CMatch(r"__must_not_hold"), ""), + (CMatch(r"__must_hold_shared"), ""), + (CMatch(r"__cond_acquires_shared"), ""), + (CMatch(r"__acquires_shared"), ""), + (CMatch(r"__releases_shared"), ""), + (CMatch(r"STRUCT_GROUP"), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From f63e6163c7e4f988b2ff35721ffc86b95425293f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:36 +0100 Subject: docs: xforms_lists: handle struct_group directly The previous logic was handling struct_group on two steps. Remove the previous approach, as CMatch can do it the right way on a single step. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/xforms_lists.py | 53 ++++------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 7fa7f52cec7b..98632c50a146 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -32,52 +32,6 @@ class CTransforms: (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), (KernRe(r'\s*____cacheline_aligned', re.S), ' '), (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a CMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use CMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the CMatch logic, - # to ensure that parentheses will be properly matched. - # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), @@ -106,7 +60,12 @@ class CTransforms: (CMatch(r"__cond_acquires_shared"), ""), (CMatch(r"__acquires_shared"), ""), (CMatch(r"__releases_shared"), ""), - (CMatch(r"STRUCT_GROUP"), r'\0'), + + (CMatch('struct_group'), r'\2'), + (CMatch('struct_group_attr'), r'\3'), + (CMatch('struct_group_tagged'), r'struct \1 \2; \3'), + (CMatch('__struct_group'), r'\4'), + ] #: Transforms for function prototypes. -- cgit v1.2.3 From 2f07ddbd5793df4ec24f727322cc68065feb3568 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:37 +0100 Subject: docs: xforms_lists: better evaluate struct_group macros The previous approach were to unwind nested structs/unions. Now that we have a logic that can handle it well, use it to ensure that struct_group macros will properly reflect the actual struct. Note that the replacemend logic still simplifies the code a little bit, as the basic build block for struct group is: union { \ struct { MEMBERS } ATTRS; \ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS There: - ATTRS is meant to add extra macro attributes like __packed which we already discard, as they aren't relevant to document struct members; - TAG is used only when built with __cplusplus. So, instead, convert them into just: struct { MEMBERS }; Please notice that here, we're using the greedy version of the backrefs, as MEMBERS is actually MEMBERS... on all such macros. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <24bf2c036b08814d9b4aabc27542fd3b2ff54424.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/xforms_lists.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 98632c50a146..2056572852fd 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -61,10 +61,16 @@ class CTransforms: (CMatch(r"__acquires_shared"), ""), (CMatch(r"__releases_shared"), ""), - (CMatch('struct_group'), r'\2'), - (CMatch('struct_group_attr'), r'\3'), - (CMatch('struct_group_tagged'), r'struct \1 \2; \3'), - (CMatch('__struct_group'), r'\4'), + # + # Macro __struct_group() creates an union with an anonymous + # and a non-anonymous struct, depending on the parameters. We only + # need one of those at kernel-doc, as we won't be documenting the same + # members twice. + # + (CMatch('struct_group'), r'struct { \2+ };'), + (CMatch('struct_group_attr'), r'struct { \3+ };'), + (CMatch('struct_group_tagged'), r'struct { \3+ };'), + (CMatch('__struct_group'), r'struct { \4+ };'), ] -- cgit v1.2.3 From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:41 +0100 Subject: docs: kdoc_parser: avoid tokenizing structs everytime Most of the rules inside CTransforms are of the type CMatch. Don't re-parse the source code every time. Doing this doesn't change the output, but makes kdoc almost as fast as before the tokenizer patches: # Before tokenizer patches $ time ./scripts/kernel-doc . -man >original 2>&1 real 0m42.933s user 0m36.523s sys 0m1.145s # After tokenizer patches $ time ./scripts/kernel-doc . -man >before 2>&1 real 1m29.853s user 1m23.974s sys 0m1.237s # After this patch $ time ./scripts/kernel-doc . -man >after 2>&1 real 0m48.579s user 0m45.938s sys 0m0.988s $ diff -s before after Files before and after are identical Manually checked the differences between original and after with: $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less They're due: - whitespace fixes; - struct_group are now better handled; - several badly-generated man pages from broken inline kernel-doc markups are now fixed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 - tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 8b2c9d0f0c58..f6c4ee3b18c9 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -737,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 2056572852fd..5a62d4a450cb 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -5,7 +5,7 @@ import re from kdoc.kdoc_re import KernRe -from kdoc.c_lex import CMatch +from kdoc.c_lex import CMatch, CTokenizer struct_args_pattern = r'([^,)]+)' @@ -16,6 +16,12 @@ class CTransforms: into something we can parse and generate kdoc for. """ + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + #: Transforms for structs and unions. struct_xforms = [ # Strip attributes @@ -124,13 +130,25 @@ class CTransforms: "var": var_xforms, } - def apply(self, xforms_type, text): + def apply(self, xforms_type, source): """ - Apply a set of transforms to a block of text. + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. """ if xforms_type not in self.xforms: - return text + return source + + if isinstance(source, str): + source = CTokenizer(source) for search, subst in self.xforms[xforms_type]: - text = search.sub(subst, text) - return text + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) -- cgit v1.2.3 From 7538df7a2d7d26428803cf8053476169a6d28659 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:42 +0100 Subject: docs: xforms_lists: use CMatch for all identifiers CMatch is lexically correct and replaces only identifiers, which is exactly where macro transformations happen. Use it to make the output safer and ensure that all arguments will be parsed the right way, even on complex cases. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <86d4a07ff0e054207747fabf38d6bb261b52b5fa.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/xforms_lists.py | 159 +++++++++++++++++----------------- 1 file changed, 79 insertions(+), 80 deletions(-) (limited to 'tools/lib/python/kdoc/xforms_lists.py') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 5a62d4a450cb..f6ea9efb11ae 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -7,7 +7,8 @@ import re from kdoc.kdoc_re import KernRe from kdoc.c_lex import CMatch, CTokenizer -struct_args_pattern = r'([^,)]+)' +struct_args_pattern = r"([^,)]+)" + class CTransforms: """ @@ -24,48 +25,40 @@ class CTransforms: #: Transforms for structs and unions. struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - - (CMatch(r"__cond_acquires"), ""), - (CMatch(r"__cond_releases"), ""), - (CMatch(r"__acquires"), ""), - (CMatch(r"__releases"), ""), - (CMatch(r"__must_hold"), ""), - (CMatch(r"__must_not_hold"), ""), - (CMatch(r"__must_hold_shared"), ""), - (CMatch(r"__cond_acquires_shared"), ""), - (CMatch(r"__acquires_shared"), ""), - (CMatch(r"__releases_shared"), ""), + (CMatch("__attribute__"), ""), + (CMatch("__aligned"), ""), + (CMatch("__counted_by"), ""), + (CMatch("__counted_by_(le|be)"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("__packed"), ""), + (CMatch("CRYPTO_MINALIGN_ATTR"), ""), + (CMatch("__private"), ""), + (CMatch("__rcu"), ""), + (CMatch("____cacheline_aligned_in_smp"), ""), + (CMatch("____cacheline_aligned"), ""), + (CMatch("__cacheline_group_(?:begin|end)"), ""), + (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"), + (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"), + (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"), + (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"), + (CMatch("DECLARE_KFIFO"), r"\2 *\1"), + (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"), + (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"), + (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"), + (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"), + (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"), + (CMatch("__cond_acquires"), ""), + (CMatch("__cond_releases"), ""), + (CMatch("__acquires"), ""), + (CMatch("__releases"), ""), + (CMatch("__must_hold"), ""), + (CMatch("__must_not_hold"), ""), + (CMatch("__must_hold_shared"), ""), + (CMatch("__cond_acquires_shared"), ""), + (CMatch("__acquires_shared"), ""), + (CMatch("__releases_shared"), ""), + (CMatch("__attribute__"), ""), # # Macro __struct_group() creates an union with an anonymous @@ -73,51 +66,57 @@ class CTransforms: # need one of those at kernel-doc, as we won't be documenting the same # members twice. # - (CMatch('struct_group'), r'struct { \2+ };'), - (CMatch('struct_group_attr'), r'struct { \3+ };'), - (CMatch('struct_group_tagged'), r'struct { \3+ };'), - (CMatch('__struct_group'), r'struct { \4+ };'), - + (CMatch("struct_group"), r"struct { \2+ };"), + (CMatch("struct_group_attr"), r"struct { \3+ };"), + (CMatch("struct_group_tagged"), r"struct { \3+ };"), + (CMatch("__struct_group"), r"struct { \4+ };"), ] #: Transforms for function prototypes. function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__no_context_analysis\s*"), ""), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), + (CMatch("static"), ""), + (CMatch("extern"), ""), + (CMatch("asmlinkage"), ""), + (CMatch("inline"), ""), + (CMatch("__inline__"), ""), + (CMatch("__inline"), ""), + (CMatch("__always_inline"), ""), + (CMatch("noinline"), ""), + (CMatch("__FORTIFY_INLINE"), ""), + (CMatch("__init"), ""), + (CMatch("__init_or_module"), ""), + (CMatch("__exit"), ""), + (CMatch("__deprecated"), ""), + (CMatch("__flatten"), ""), + (CMatch("__meminit"), ""), + (CMatch("__must_check"), ""), + (CMatch("__weak"), ""), + (CMatch("__sched"), ""), + (CMatch("__always_unused"), ""), + (CMatch("__printf"), ""), + (CMatch("__(?:re)?alloc_size"), ""), + (CMatch("__diagnose_as"), ""), + (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"), + (CMatch("__no_context_analysis"), ""), + (CMatch("__attribute_const__"), ""), + (CMatch("__attribute__"), ""), + + # + # HACK: this is similar to process_export() hack. It is meant to + # drop _noproof from function name. See for instance: + # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h. + # + (KernRe("_noprof"), ""), ] #: Transforms for variable prototypes. var_xforms = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (CMatch("__read_mostly"), ""), + (CMatch("__ro_after_init"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("LIST_HEAD"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- cgit v1.2.3