From 77e6e17e9fc4cb4e59ad97de5453bb6f963a5fd4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:46 +0100 Subject: docs: kdoc_parser: move var transformers to the beginning Just like functions and structs had their transform variables placed at the beginning, move variable transforms to there as well. No functional changes. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <491b290252a308f381f88353a3bbe9e2bd1f6a62.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..68a5aea9175d 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -192,6 +192,18 @@ function_xforms = [ (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] +# +# Transforms for variable prototypes +# +var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + (KernRe(r"=.*"), ""), +] + # # Ancillary functions # @@ -972,15 +984,6 @@ class KernelDoc: ] OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] - # # Store the full prototype before modifying it # @@ -1004,7 +1007,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: + for r, sub in var_xforms: proto = r.sub(sub, proto) proto = proto.rstrip() -- cgit v1.2.3 From cca1bbdd72f72a3cf86d90fd6f326fd709ae931f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:47 +0100 Subject: docs: kdoc_parser: don't mangle with function defines Mangling with #defines is not nice, as we may end removing the macro names, preventing several macros from being properly documented. Also, on defines, we have something like: #define foo(a1, a2, a3, ...) \ /* some real implementation */ The prototype part (first line on this example) won't contain any macros, so no need to apply any regexes on it. With that, move the apply_transforms() logic to ensure that it will be called only on functions. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8f9854c8ca1c794b6a3fe418f7adbc32aa68b432.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 68a5aea9175d..9643ffb7584a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -163,7 +163,7 @@ struct_nested_prefixes = [ # # Transforms for function prototypes # -function_xforms = [ +function_xforms = [ (KernRe(r"^static +"), ""), (KernRe(r"^extern +"), ""), (KernRe(r"^asmlinkage +"), ""), @@ -1066,10 +1066,7 @@ class KernelDoc: found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1088,6 +1085,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) -- cgit v1.2.3 From 4fd349f03dc51bc2f9cd2ea9f6309b0bc2b848ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:48 +0100 Subject: docs: kdoc_parser: fix variable regexes to work with size_t The regular expressions meant to pick variable types are too naive: they forgot that the type word may contain underlines. It also means that we need to change the regex which detects var attributes to handle "const". Co-developed-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8230715239929cf9d475ab81ca1df7de65d82d06.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 9643ffb7584a..9c9443281c40 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -981,8 +981,9 @@ class KernelDoc: """ VAR_ATTRIBS = [ "extern", + "const", ] - OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" + OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" # # Store the full prototype before modifying it @@ -1018,14 +1019,14 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- cgit v1.2.3 From 9bbf22b87d866fa1e6a1f9f6376d2ef458b6dcc7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:49 +0100 Subject: docs: kdoc_parser: fix the default_value logic for variables The indentation is wrong for the second regex, which causes problems on variables with defaults. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <681f18338abd6ae33cb9c15d72bb31a1cba75a9a.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 9c9443281c40..4bf55244870f 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1027,9 +1027,9 @@ class KernelDoc: default_val = r.group(2) else: r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return -- cgit v1.2.3 From b7dc635459ad5b00f2d482406dbdca3291622ce2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:50 +0100 Subject: docs: kdoc_parser: don't exclude defaults from prototype If we do that, the defaults won't be parsed. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4bf55244870f..39ff27d421eb 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -201,7 +201,6 @@ var_xforms = [ (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), ] # -- cgit v1.2.3 From 6d9c2e9575b8630e17571a77eef8ade84a2a6344 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:51 +0100 Subject: docs: kdoc_parser: fix parser to support multi-word types The regular expression currently expects a single word for the type, but it may be something like "struct foo". Add support for it. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <544c73a9e670b6fef1828bf4f2ba0de7d29d8675.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 39ff27d421eb..22a820d33dc8 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1018,14 +1018,14 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- cgit v1.2.3 From 9bff5121fe22fdd0bb5bd6f744e136ec20bf7b95 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:52 +0100 Subject: docs: kdoc_parser: add support for LIST_HEAD Convert LIST_HEAD into struct list_head when handling its prototype. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8bdfa6ba6002b0a73a83660f0ce7b40e30124552.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 22a820d33dc8..1df869061bf3 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -198,6 +198,7 @@ function_xforms = [ var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- cgit v1.2.3 From 97d4e70bc2c6f75911a9a5e1a75f2de13fde9b6b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Mar 2026 17:40:53 +0100 Subject: docs: kdoc_parser: handle struct member macro VIRTIO_DECLARE_FEATURES(name) Parse the macro VIRTIO_DECLARE_FEATURES(name) and expand it to its definition. These prevents one build warning: WARNING: include/linux/virtio.h:188 struct member 'VIRTIO_DECLARE_FEATURES(features' not described in 'virtio_device' Signed-off-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <6f62e1f1210e74906fa50f4e937f66f54813661b.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 1df869061bf3..917e4528bfbf 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -150,6 +150,7 @@ struct_xforms = [ struct_args_pattern + r'\)', re.S), r'\1 \2[]'), (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), ] # # Regexes here are guaranteed to have the end delimiter matching -- cgit v1.2.3 From d842057c4a205084fb3036122c7426963f04e826 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:55 +0100 Subject: docs: kdoc_parser: move transform lists to a separate file Over the time, most of the changes at kernel-doc are related to maintaining a list of transforms to convert macros into pure C code. Place such transforms on a separate module, to cleanup the parser module. There is an advantage on that: QEMU also uses our own kernel-doc, but the xforms list there is different. By placing it on a separate module, we can minimize the differences and make it easier to keep QEMU in sync with Kernel upstream. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- Documentation/tools/kdoc_parser.rst | 8 ++ tools/lib/python/kdoc/kdoc_files.py | 3 +- tools/lib/python/kdoc/kdoc_parser.py | 145 ++------------------------------ tools/lib/python/kdoc/xforms_lists.py | 153 ++++++++++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 141 deletions(-) create mode 100644 tools/lib/python/kdoc/xforms_lists.py (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/Documentation/tools/kdoc_parser.rst b/Documentation/tools/kdoc_parser.rst index 03ee54a1b1cc..55b202173195 100644 --- a/Documentation/tools/kdoc_parser.rst +++ b/Documentation/tools/kdoc_parser.rst @@ -4,6 +4,14 @@ Kernel-doc parser stage ======================= +C replacement rules used by the parser +====================================== + +.. automodule:: lib.python.kdoc.xforms_lists + :members: + :show-inheritance: + :undoc-members: + File handler classes ==================== diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 022487ea2cc6..33618c6abec2 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -15,6 +15,7 @@ import os import re from kdoc.kdoc_parser import KernelDoc +from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat @@ -117,7 +118,7 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname) + doc = KernelDoc(self.config, fname, CTransforms()) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 917e4528bfbf..d7daf658e9d2 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -69,89 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), -] # # Regexes here are guaranteed to have the end delimiter matching # the start delimiter. Yet, right now, only one replace group @@ -161,62 +78,10 @@ struct_nested_prefixes = [ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Transforms for variable prototypes -# -var_xforms = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), -] - # # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -395,11 +260,12 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms # Initial state for the state machines self.state = state.NORMAL @@ -883,7 +749,7 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.xforms.apply("struct", members) nested = NestedMatch() for search, sub in struct_nested_prefixes: @@ -1009,8 +875,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in var_xforms: - proto = r.sub(sub, proto) + proto = self.xforms.apply("var", proto) proto = proto.rstrip() @@ -1091,7 +956,7 @@ class KernelDoc: # # Apply the initial transformations. # - prototype = apply_transforms(function_xforms, prototype) + prototype = self.xforms.apply("func", prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..e6e0302e5dd0 --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . + +import re + +from kdoc.kdoc_re import KernRe + +struct_args_pattern = r'([^,)]+)' + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + #: Transforms for structs and unions. + struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + ] + + #: Transforms for function prototypes. + function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__exit +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), + ] + + #: Transforms for variable prototypes. + var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] + + #: Transforms main dictionary used at apply_transforms(). + xforms = { + "struct": struct_xforms, + "func": function_xforms, + "var": var_xforms, + } + + def apply(self, xforms_type, text): + """ + Apply a set of transforms to a block of text. + """ + if xforms_type not in self.xforms: + return text + + for search, subst in self.xforms[xforms_type]: + text = search.sub(subst, text) + return text -- cgit v1.2.3 From 4ff59bdd93f0e80b5014977502d082c778f96304 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Mar 2026 17:40:56 +0100 Subject: docs: xforms_lists: ignore context analysis and lock attributes Drop context analysis and lock (tracking) attributes to avoid kernel-doc warnings. There are now lots of warnings like these: Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96] int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex) ------------------------------------------------------------------------------------------------^ Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92] int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock) --------------------------------------------------------------------------------------------^ The regex is suggested by Mauro; mine was too greedy. Thanks. Updated context analysis and lock macros list provided by PeterZ. Thanks. [mchehab: modified to be applied after xforms_lists split] Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/ Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <3c7fdfc364a8920f92530b47bdbf4bb29a40371f.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 10 ++++++++++ tools/lib/python/kdoc/xforms_lists.py | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index d7daf658e9d2..503a18212747 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,6 +75,16 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # is allowed. # struct_nested_prefixes = [ + (re.compile(r"__cond_acquires\s*\("), ""), + (re.compile(r"__cond_releases\s*\("), ""), + (re.compile(r"__acquires\s*\("), ""), + (re.compile(r"__releases\s*\("), ""), + (re.compile(r"__must_hold\s*\("), ""), + (re.compile(r"__must_not_hold\s*\("), ""), + (re.compile(r"__must_hold_shared\s*\("), ""), + (re.compile(r"__cond_acquires_shared\s*\("), ""), + (re.compile(r"__acquires_shared\s*\("), ""), + (re.compile(r"__releases_shared\s*\("), ""), (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index e6e0302e5dd0..1bda7c4634c3 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -22,6 +22,8 @@ class CTransforms: (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), (KernRe(r'\s*__packed\s*', re.S), ' '), (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*__private', re.S), ' '), @@ -120,6 +122,7 @@ class CTransforms: (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] @@ -128,6 +131,8 @@ class CTransforms: var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), -- cgit v1.2.3 From 34503b5fd10d8c7f1b1f4fecb6aae826fcf79424 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:59 +0100 Subject: docs: kdoc_re: Change NestedMath args replacement to \0 Future patches will allow parsing each argument instead of the hole set. Prepare for it by changing the replace all args from \1 to \0. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <46e383118be9d9e432e3814fe819ebb12261d7b4.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/kdoc_re.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 503a18212747..0f90c16cb51a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -85,7 +85,7 @@ struct_nested_prefixes = [ (re.compile(r"__cond_acquires_shared\s*\("), ""), (re.compile(r"__acquires_shared\s*\("), ""), (re.compile(r"__releases_shared\s*\("), ""), - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), ] # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 00afa5bccd6d..ea4f6f3d9e42 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -188,7 +188,7 @@ class NestedMatch: # except that the content inside the match group is delimiter-aligned. # # The content inside parentheses is converted into a single replace - # group (e.g. r`\1'). + # group (e.g. r`\0'). # # It would be nice to change such definition to support multiple # match groups, allowing a regex equivalent to: @@ -291,7 +291,7 @@ class NestedMatch: if the sub argument contains:: - r'\1' + r'\0' it will work just like re: it places there the matched paired data with the delimiter stripped. @@ -310,9 +310,9 @@ class NestedMatch: # Value, ignoring start/end delimiters value = line[end:pos - 1] - # replaces \1 at the sub string, if \1 is used there + # replaces \0 at the sub string, if \0 is used there new_sub = sub - new_sub = new_sub.replace(r'\1', value) + new_sub = new_sub.replace(r'\0', value) out += new_sub -- cgit v1.2.3 From fc44c0a0b2a72f2e9331063a311a548634ae18af Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:41:00 +0100 Subject: docs: kdoc_re: make NestedMatch use KernRe Instead of using re_compile, let's create the class with the regex and use KernRe to keep it cached. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 25 ++++++++++++------------- tools/lib/python/kdoc/kdoc_re.py | 24 +++++++++++++++++------- 2 files changed, 29 insertions(+), 20 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 0f90c16cb51a..cd9857906a2b 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,17 +75,17 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # is allowed. # struct_nested_prefixes = [ - (re.compile(r"__cond_acquires\s*\("), ""), - (re.compile(r"__cond_releases\s*\("), ""), - (re.compile(r"__acquires\s*\("), ""), - (re.compile(r"__releases\s*\("), ""), - (re.compile(r"__must_hold\s*\("), ""), - (re.compile(r"__must_not_hold\s*\("), ""), - (re.compile(r"__must_hold_shared\s*\("), ""), - (re.compile(r"__cond_acquires_shared\s*\("), ""), - (re.compile(r"__acquires_shared\s*\("), ""), - (re.compile(r"__releases_shared\s*\("), ""), - (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] # @@ -761,9 +761,8 @@ class KernelDoc: members = trim_private_members(members) members = self.xforms.apply("struct", members) - nested = NestedMatch() for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) + members = search.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index ea4f6f3d9e42..085b89a4547c 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -200,7 +200,10 @@ class NestedMatch: # # FOO(arg1, arg2, arg3) - def _search(self, regex, line): + def __init__(self, regex): + self.regex = KernRe(regex) + + def _search(self, line): """ Finds paired blocks for a regex that ends with a delimiter. @@ -222,7 +225,7 @@ class NestedMatch: stack = [] - for match_re in regex.finditer(line): + for match_re in self.regex.finditer(line): start = match_re.start() offset = match_re.end() string_char = None @@ -270,7 +273,7 @@ class NestedMatch: yield start, offset, pos + 1 break - def search(self, regex, line): + def search(self, line): """ This is similar to re.search: @@ -278,12 +281,12 @@ class NestedMatch: returning occurrences only if all delimiters are paired. """ - for t in self._search(regex, line): + for t in self._search(line): yield line[t[0]:t[2]] - def sub(self, regex, sub, line, count=0): - r""" + def sub(self, sub, line, count=0): + """ This is similar to re.sub: It matches a regex that it is followed by a delimiter, @@ -304,7 +307,7 @@ class NestedMatch: cur_pos = 0 n = 0 - for start, end, pos in self._search(regex, line): + for start, end, pos in self._search(line): out += line[cur_pos:start] # Value, ignoring start/end delimiters @@ -331,3 +334,10 @@ class NestedMatch: out += line[cur_pos:l] return out + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'NestedMatch("{self.regex.regex.pattern}")' -- cgit v1.2.3 From 85c2a51357f720fabfb6fa8d2551d87a94e797cb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:41:01 +0100 Subject: docs: kdoc_parser: move nested match transforms to xforms_lists.py As NestedMatch now has a sub method and a declaration close to what KernRe does, we can move the rules to xforms_lists and simplify kdoc_parser a little bit. No functional changes. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <762ce2a58ff024c1b0b6f6a6e05020d1415b8308.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 21 --------------------- tools/lib/python/kdoc/xforms_lists.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index cd9857906a2b..edf70ba139a5 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -69,25 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), -] - # # Ancillary functions # @@ -761,8 +742,6 @@ class KernelDoc: members = trim_private_members(members) members = self.xforms.apply("struct", members) - for search, sub in struct_nested_prefixes: - members = search.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 1bda7c4634c3..c07cbe1e6349 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,7 @@ import re -from kdoc.kdoc_re import KernRe +from kdoc.kdoc_re import KernRe, NestedMatch struct_args_pattern = r'([^,)]+)' @@ -94,6 +94,18 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From b1e64e30fce86e61d3b09f9352b262622f3f0cda Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:23 +0100 Subject: docs: kdoc: don't add broken comments inside prototypes Parsing a file like drivers/scsi/isci/host.h, which contains broken kernel-doc markups makes it create a prototype that contains unmatched end comments. That causes, for instance, struct sci_power_control to be shown this this prototype: struct sci_power_control { * it is not. */ bool timer_started; */ struct sci_timer timer; * requesters field. */ u8 phys_waiting; */ u8 phys_granted_power; * mapped into requesters via struct sci_phy.phy_index */ struct isci_phy *requesters[SCI_MAX_PHYS]; }; as comments won't start with "/*" anymore. Fix the logic to detect such cases, and keep adding the comments inside it. Signed-off-by: Mauro Carvalho Chehab Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mchehab+huawei@kernel.org> Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <12ac4a97e2bd5a19d6537122c10098690c38d2c7.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index edf70ba139a5..086579d00b5c 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1355,6 +1355,12 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line) # else ... ?? def process_inline_text(self, ln, line): -- cgit v1.2.3 From d5265f7af2d284d5421b763f268157b5fa72f806 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:24 +0100 Subject: docs: kdoc: properly handle empty enum arguments Depending on how the enum proto is written, a comma at the end may incorrectly make kernel-doc parse an arg like " ". Strip spaces before checking if arg is empty. Signed-off-by: Mauro Carvalho Chehab Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet Message-ID: <640784283d52c5fc52ea597344ecd567e2fb6e22.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 086579d00b5c..4b3c555e6c8e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -810,9 +810,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed -- cgit v1.2.3 From cd77a9aa20ef53a03e5bb2630a5e7b16b910f198 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:26 +0100 Subject: docs: kdoc: use tokenizer to handle comments on structs Better handle comments inside structs. After those changes, all unittests now pass: test_private: TestPublicPrivate: test balanced_inner_private: OK test balanced_non_greddy_private: OK test balanced_private: OK test no private: OK test unbalanced_inner_private: OK test unbalanced_private: OK test unbalanced_struct_group_tagged_with_private: OK test unbalanced_two_struct_group_tagged_first_with_private: OK test unbalanced_without_end_of_line: OK Ran 9 tests This also solves a bug when handling STRUCT_GROUP() with a private comment on it: @@ -397134,7 +397134,7 @@ basic V4L2 device-level support. unsigned int max_len; unsigned int offset; struct page_pool_params_slow slow; - STRUCT_GROUP( struct net_device *netdev; + struct net_device *netdev; unsigned int queue_idx; unsigned int flags; }; Signed-off-by: Mauro Carvalho Chehab Message-ID: Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <054763260f7b5459ad0738ed906d7c358d640692.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4b3c555e6c8e..62d8030cf532 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,6 +13,7 @@ import sys import re from pprint import pformat +from kdoc.c_lex import CTokenizer from kdoc.kdoc_re import NestedMatch, KernRe from kdoc.kdoc_item import KdocItem @@ -84,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ -- cgit v1.2.3 From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:34 +0100 Subject: docs: kdoc: replace NestedMatch with CMatch Our previous approach to solve nested structs were to use NestedMatch. It works well, but adding support to parse delimiters is very complex. Instead, use CMatch, which uses a C tokenizer, making the code more reliable and simpler. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 62d8030cf532..efd58c88ff31 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -14,7 +14,7 @@ import re from pprint import pformat from kdoc.c_lex import CTokenizer -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index c07cbe1e6349..7fa7f52cec7b 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,8 @@ import re -from kdoc.kdoc_re import KernRe, NestedMatch +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch struct_args_pattern = r'([^,)]+)' @@ -60,7 +61,7 @@ class CTransforms: # # As it doesn't properly match the end parenthesis on some cases. # - # So, a better solution was crafted: there's now a NestedMatch + # So, a better solution was crafted: there's now a CMatch # class that ensures that delimiters after a search are properly # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. @@ -72,9 +73,9 @@ class CTransforms: # # Replace macros # - # TODO: use NestedMatch for FOO($1, $2, ...) matches + # TODO: use CMatch for FOO($1, $2, ...) matches # - # it is better to also move those to the NestedMatch logic, + # it is better to also move those to the CMatch logic, # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), @@ -95,17 +96,17 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), + (CMatch(r"__cond_acquires"), ""), + (CMatch(r"__cond_releases"), ""), + (CMatch(r"__acquires"), ""), + (CMatch(r"__releases"), ""), + (CMatch(r"__must_hold"), ""), + (CMatch(r"__must_not_hold"), ""), + (CMatch(r"__must_hold_shared"), ""), + (CMatch(r"__cond_acquires_shared"), ""), + (CMatch(r"__acquires_shared"), ""), + (CMatch(r"__releases_shared"), ""), + (CMatch(r"STRUCT_GROUP"), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From 024e200e2a89d71dceff7d1bff8ae77b145726e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:38 +0100 Subject: docs: c_lex: setup a logger to report tokenizer issues Report file that has issues detected via CMatch and CTokenizer. This is done by setting up a logger that will be overriden by kdoc_parser, when used on it. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <903ad83ae176196a50444e66177a4f5bcdef5199.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 16 ++++++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index 20e50ff0ecd5..b6d58bd470a9 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -22,6 +22,22 @@ from .kdoc_re import KernRe log = logging.getLogger(__name__) +def tokenizer_set_log(logger, prefix = ""): + """ + Replace the module‑level logger with a LoggerAdapter that + prepends *prefix* to every message. + """ + global log + + class PrefixAdapter(logging.LoggerAdapter): + """ + Ancillary class to set prefix on all message logs. + """ + def process(self, msg, kwargs): + return f"{prefix}{msg}", kwargs + + # Wrap the provided logger in our adapter + log = PrefixAdapter(logger, {"prefix": prefix}) class CToken(): """ diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index efd58c88ff31..f90c6dd0343d 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,7 @@ import sys import re from pprint import pformat -from kdoc.c_lex import CTokenizer +from kdoc.c_lex import CTokenizer, tokenizer_set_log from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem @@ -253,6 +253,8 @@ class KernelDoc: self.config = config self.xforms = xforms + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") + # Initial state for the state machines self.state = state.NORMAL -- cgit v1.2.3 From 12aa7753ff4c5fea405d139bcf67f49bda2c932e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:40 +0100 Subject: docs: kdoc: ensure that comments are dropped before calling split_struct_proto() Changeset 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") revealed a hidden bug at split_struct_proto(): some comments there may break its capability of properly identifying a struct. Fixing it is as simple as stripping comments before calling it. Fixes: 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f90c6dd0343d..8b2c9d0f0c58 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -723,6 +723,7 @@ class KernelDoc: # # Do the basic parse to get the pieces of the declaration. # + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -763,6 +764,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -770,7 +772,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -778,7 +780,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # -- cgit v1.2.3 From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:41 +0100 Subject: docs: kdoc_parser: avoid tokenizing structs everytime Most of the rules inside CTransforms are of the type CMatch. Don't re-parse the source code every time. Doing this doesn't change the output, but makes kdoc almost as fast as before the tokenizer patches: # Before tokenizer patches $ time ./scripts/kernel-doc . -man >original 2>&1 real 0m42.933s user 0m36.523s sys 0m1.145s # After tokenizer patches $ time ./scripts/kernel-doc . -man >before 2>&1 real 1m29.853s user 1m23.974s sys 0m1.237s # After this patch $ time ./scripts/kernel-doc . -man >after 2>&1 real 0m48.579s user 0m45.938s sys 0m0.988s $ diff -s before after Files before and after are identical Manually checked the differences between original and after with: $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less They're due: - whitespace fixes; - struct_group are now better handled; - several badly-generated man pages from broken inline kernel-doc markups are now fixed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 - tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 8b2c9d0f0c58..f6c4ee3b18c9 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -737,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 2056572852fd..5a62d4a450cb 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -5,7 +5,7 @@ import re from kdoc.kdoc_re import KernRe -from kdoc.c_lex import CMatch +from kdoc.c_lex import CMatch, CTokenizer struct_args_pattern = r'([^,)]+)' @@ -16,6 +16,12 @@ class CTransforms: into something we can parse and generate kdoc for. """ + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + #: Transforms for structs and unions. struct_xforms = [ # Strip attributes @@ -124,13 +130,25 @@ class CTransforms: "var": var_xforms, } - def apply(self, xforms_type, text): + def apply(self, xforms_type, source): """ - Apply a set of transforms to a block of text. + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. """ if xforms_type not in self.xforms: - return text + return source + + if isinstance(source, str): + source = CTokenizer(source) for search, subst in self.xforms[xforms_type]: - text = search.sub(subst, text) - return text + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) -- cgit v1.2.3 From e394855fcc897f73f23c364a3a596b54cc879e4c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:08 +0100 Subject: docs: kdoc_item: fix a typo on sections_start_lines Currently, there are 15 occurrences of section?_start_lines, with 10 using the plural way. This is an issue, as, while kdoc_output works with KdocItem, the term doesn't match its init value. The variable sections_start_lines stores multiple sections, so placing it in plural is its correct way. So, ensure that, on all parts of kdoc, this will be referred as sections_start_lines. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_item.py | 2 +- tools/lib/python/kdoc/kdoc_output.py | 2 +- tools/lib/python/kdoc/kdoc_parser.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 5f41790efacb..fe08cac861c2 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -82,7 +82,7 @@ class KdocItem: Set sections and start lines. """ self.sections = sections - self.section_start_lines = start_lines + self.sections_start_lines = start_lines def set_params(self, names, descs, types, starts): """ diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 73d71cbeabb5..1b54117dbe19 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -389,7 +389,7 @@ class RestFormat(OutputFormat): else: self.data += f'{self.lineprefix}**{section}**\n\n' - self.print_lineno(args.section_start_lines.get(section, 0)) + self.print_lineno(args.sections_start_lines.get(section, 0)) self.output_highlight(text) self.data += "\n" self.data += "\n" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f6c4ee3b18c9..35658a7e72d5 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -140,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -220,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -316,7 +316,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) -- cgit v1.2.3 From 01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:14 +0100 Subject: docs: kernel-doc: add support to store output on a YAML file Add a command line parameter and library support to optionally store: - KdocItem intermediate format after parsing; - man pages output; - rst output. inside a YAML file. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/docs/kernel-doc | 48 ++++++++++++++++++++++++++++++------ tools/lib/python/kdoc/kdoc_files.py | 47 ++++++++++++++++++++++++++++++----- tools/lib/python/kdoc/kdoc_parser.py | 27 +++++++++++++++++++- 3 files changed, 107 insertions(+), 15 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/docs/kernel-doc b/tools/docs/kernel-doc index 3a932f95bdf5..d9192c3f1645 100755 --- a/tools/docs/kernel-doc +++ b/tools/docs/kernel-doc @@ -240,11 +240,9 @@ def main(): help=EXPORT_FILE_DESC) # - # Output format mutually-exclusive group + # Output format # - out_group = parser.add_argument_group("Output format selection (mutually exclusive)") - - out_fmt = out_group.add_mutually_exclusive_group() + out_fmt = parser.add_argument_group("Output format selection (mutually exclusive)") out_fmt.add_argument("-m", "-man", "--man", action="store_true", help="Output troff manual page format.") @@ -253,6 +251,12 @@ def main(): out_fmt.add_argument("-N", "-none", "--none", action="store_true", help="Do not output documentation, only warnings.") + out_fmt.add_argument("-y", "--yaml-file", "--yaml", + help="Stores kernel-doc output on a yaml file.") + out_fmt.add_argument("-k", "--kdoc-item", "--kdoc", action="store_true", + help="Store KdocItem inside yaml file. Ued together with --yaml.") + + # # Output selection mutually-exclusive group # @@ -323,14 +327,42 @@ def main(): from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 - if args.man: - out_style = ManFormat(modulename=args.modulename) - elif args.none: + yaml_content = set() + if args.yaml_file: out_style = None + + if args.man: + yaml_content |= {"man"} + + if args.rst: + yaml_content |= {"rst"} + + if args.kdoc_item or not yaml_content: + yaml_content |= {"KdocItem"} + else: - out_style = RestFormat() + n_outputs = 0 + + if args.man: + out_style = ManFormat(modulename=args.modulename) + n_outputs += 1 + + if args.none: + out_style = None + n_outputs += 1 + + if args.rst or n_outputs == 0: + n_outputs += 1 + out_style = RestFormat() + + if n_outputs > 1: + parser.error("Those arguments are muttually exclusive: --man, --rst, --none, except when generating a YAML file.") + + elif not n_outputs: + out_style = RestFormat() kfiles = KernelFiles(verbose=args.verbose, + yaml_file=args.yaml_file, yaml_content=yaml_content, out_style=out_style, werror=args.werror, wreturn=args.wreturn, wshort_desc=args.wshort_desc, wcontents_before_sections=args.wcontents_before_sections) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 58f4ee08e226..5a299ed44d62 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -16,6 +16,7 @@ import re from kdoc.kdoc_parser import KernelDoc from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat +from kdoc.kdoc_yaml_file import KDocTestFile class GlobSourceFiles: @@ -152,6 +153,12 @@ class KernelFiles(): If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + ``yaml_file`` + If defined, stores the output inside a YAML file. + + ``yaml_content`` + Defines what will be inside the YAML file. + Note: There are two type of parsers defined here: @@ -181,7 +188,12 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname, self.xforms) + if self.test_file: + store_src = True + else: + store_src = False + + doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -191,6 +203,10 @@ class KernelFiles(): self.results[fname] = entries + source = doc.get_source() + if source: + self.source[fname] = source + def process_export_file(self, fname): """ Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. @@ -220,7 +236,7 @@ class KernelFiles(): def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None): + yaml_file=None, yaml_content=None, logger=None): """ Initialize startup variables and parse all files. """ @@ -259,6 +275,11 @@ class KernelFiles(): # Override log warning, as we want to count errors self.config.warning = self.warning + if yaml_file: + self.test_file = KDocTestFile(self.config, yaml_file, yaml_content) + else: + self.test_file = None + if xforms: self.xforms = xforms else: @@ -273,6 +294,7 @@ class KernelFiles(): self.errors = 0 self.results = {} + self.source = {} self.files = set() self.export_files = set() @@ -331,16 +353,29 @@ class KernelFiles(): for s in symbol: function_table.add(s) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] + if self.test_file: + self.test_file.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + self.test_file.output_symbols(fname, symbols, + self.source.get(fname)) + + continue + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg + + if self.test_file: + self.test_file.write() diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 35658a7e72d5..a10e64589d76 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -246,12 +246,13 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname, xforms): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config self.xforms = xforms + self.store_src = store_src tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") @@ -264,6 +265,9 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] + # When store_src is true, the kernel-doc source content is stored here + self.source = None + # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -1592,6 +1596,15 @@ class KernelDoc: state.DOCBLOCK: process_docblock, } + def get_source(self): + """ + Return the file content of the lines handled by kernel-doc at the + latest parse_kdoc() run. + + Returns none if KernelDoc() was not initialized with store_src, + """ + return self.source + def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1605,6 +1618,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.source = [] + self.state = state.NORMAL try: with open(self.fname, "r", encoding="utf8", @@ -1631,6 +1646,8 @@ class KernelDoc: ln, state.name[self.state], line) + prev_state = self.state + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1641,6 +1658,14 @@ class KernelDoc: # Hand this line to the appropriate state handler self.state_actions[self.state](self, ln, line) + if self.store_src and prev_state != self.state or self.state != state.NORMAL: + if self.state == state.NAME: + # A "/**" was detected. Add a new source element + self.source.append({"ln": ln, "data": line + "\n"}) + else: + # Append to the existing one + self.source[-1]["data"] += line + "\n" + self.emit_unused_warnings() except OSError: -- cgit v1.2.3 From 99ec67a9984fdf38c7ed78695aeb1b99cfee5b50 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:50 +0100 Subject: docs: kdoc: better handle source when producing YAML output The current logic was storing symbols source code on a list, not linked to the actual KdocItem. While this works fine when kernel-doc markups are OK, on places where there is a "/**" without a valid kernel-doc markup, it ends that the 1:1 match between source code and KdocItem doesn't happen, causing problems to generate the YAML output. Fix it by storing the source code directly into the KdocItem structure. This shouldn't affect performance or memory footprint, except when --yaml option is used. While here, add a __repr__() function for KdocItem, as it helps debugging it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <77902dafabb5c3250486aa2dc1568d5fafa95c5b.1774256269.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 8 +-- tools/lib/python/kdoc/kdoc_item.py | 6 +- tools/lib/python/kdoc/kdoc_parser.py | 100 ++++++++++++++++---------------- tools/lib/python/kdoc/kdoc_yaml_file.py | 28 ++++----- tools/unittests/test_kdoc_parser.py | 9 +++ 5 files changed, 79 insertions(+), 72 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 5a299ed44d62..2428cfc4e843 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -203,10 +203,6 @@ class KernelFiles(): self.results[fname] = entries - source = doc.get_source() - if source: - self.source[fname] = source - def process_export_file(self, fname): """ Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. @@ -294,7 +290,6 @@ class KernelFiles(): self.errors = 0 self.results = {} - self.source = {} self.files = set() self.export_files = set() @@ -364,8 +359,7 @@ class KernelFiles(): function_table, enable_lineno, no_doc_sections) - self.test_file.output_symbols(fname, symbols, - self.source.get(fname)) + self.test_file.output_symbols(fname, symbols) continue diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index fe08cac861c2..a7aa6e1e4c1c 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -14,7 +14,8 @@ class KdocItem: then pass into the output modules. """ - def __init__(self, name, fname, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, + **other_stuff): self.name = name self.fname = fname self.type = type @@ -60,6 +61,9 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + def __repr__(self): + return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})" + @classmethod def from_dict(cls, d): """Create a KdocItem from a plain dict.""" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index a10e64589d76..74af7ae47aa4 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -265,9 +265,6 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # When store_src is true, the kernel-doc source content is stored here - self.source = None - # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -720,13 +717,14 @@ class KernelDoc: return declaration - def dump_struct(self, ln, proto): + def dump_struct(self, ln, proto, source): """ Store an entry for a ``struct`` or ``union`` """ # # Do the basic parse to get the pieces of the declaration. # + source = source proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: @@ -756,10 +754,11 @@ class KernelDoc: declaration_name) self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, + source=source, definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) - def dump_enum(self, ln, proto): + def dump_enum(self, ln, proto, source): """ Store an ``enum`` inside self.entries array. """ @@ -767,6 +766,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + source = source proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # @@ -831,9 +831,10 @@ class KernelDoc: f"Excess enum value '@{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, + source=source, purpose=self.entry.declaration_purpose) - def dump_var(self, ln, proto): + def dump_var(self, ln, proto, source): """ Store variables that are part of kAPI. """ @@ -846,6 +847,7 @@ class KernelDoc: # # Store the full prototype before modifying it # + source = source full_proto = proto declaration_name = None @@ -895,32 +897,34 @@ class KernelDoc: default_val = default_val.lstrip("=").strip() self.output_declaration("var", declaration_name, + source=source, full_proto=full_proto, default_val=default_val, purpose=self.entry.declaration_purpose) - def dump_declaration(self, ln, prototype): + def dump_declaration(self, ln, prototype, source): """ Store a data declaration inside self.entries array. """ if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) + self.dump_enum(ln, prototype, source) elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) + self.dump_typedef(ln, prototype, source) elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) + self.dump_struct(ln, prototype, source) elif self.entry.decl_type == "var": - self.dump_var(ln, prototype) + self.dump_var(ln, prototype, source) else: # This would be a bug self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - def dump_function(self, ln, prototype): + def dump_function(self, ln, prototype, source): """ Store a function or function macro inside self.entries array. """ + source = source found = func_macro = False return_type = '' decl_type = 'function' @@ -1013,13 +1017,14 @@ class KernelDoc: # Store the result. # self.output_declaration(decl_type, declaration_name, + source=source, typedef=('typedef' in return_type), functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) - def dump_typedef(self, ln, proto): + def dump_typedef(self, ln, proto, source): """ Store a ``typedef`` inside self.entries array. """ @@ -1030,6 +1035,8 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' + source = source + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) @@ -1050,6 +1057,7 @@ class KernelDoc: self.create_parameter_list(ln, 'function', args, ',', declaration_name) self.output_declaration('function', declaration_name, + source=source, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1067,6 +1075,7 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, + source=source, purpose=self.entry.declaration_purpose) return @@ -1104,7 +1113,7 @@ class KernelDoc: function_set.add(symbol) return True - def process_normal(self, ln, line): + def process_normal(self, ln, line, source): """ STATE_NORMAL: looking for the ``/**`` to begin everything. """ @@ -1118,7 +1127,7 @@ class KernelDoc: # next line is always the function name self.state = state.NAME - def process_name(self, ln, line): + def process_name(self, ln, line, source): """ STATE_NAME: Looking for the "name - description" line """ @@ -1251,7 +1260,7 @@ class KernelDoc: return False - def process_decl(self, ln, line): + def process_decl(self, ln, line, source): """ STATE_DECLARATION: We've seen the beginning of a declaration. """ @@ -1280,7 +1289,7 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") - def process_special(self, ln, line): + def process_special(self, ln, line, source): """ STATE_SPECIAL_SECTION: a section ending with a blank line. """ @@ -1331,7 +1340,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_body(self, ln, line): + def process_body(self, ln, line, source): """ STATE_BODY: the bulk of a kerneldoc comment. """ @@ -1345,7 +1354,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline_name(self, ln, line): + def process_inline_name(self, ln, line, source): """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" if doc_inline_sect.search(line): @@ -1363,10 +1372,10 @@ class KernelDoc: # Don't let it add partial comments at the code, as breaks the # logic meant to remove comments from prototypes. # - self.process_proto_type(ln, "/**\n" + line) + self.process_proto_type(ln, "/**\n" + line, source) # else ... ?? - def process_inline_text(self, ln, line): + def process_inline_text(self, ln, line, source): """STATE_INLINE_TEXT: docbook comments within a prototype.""" if doc_inline_end.search(line): @@ -1452,7 +1461,7 @@ class KernelDoc: return proto - def process_proto_function(self, ln, line): + def process_proto_function(self, ln, line, source): """Ancillary routine to process a function prototype.""" # strip C99-style comments to end of line @@ -1494,10 +1503,10 @@ class KernelDoc: # # ... and we're done # - self.dump_function(ln, self.entry.prototype) + self.dump_function(ln, self.entry.prototype, source) self.reset_state(ln) - def process_proto_type(self, ln, line): + def process_proto_type(self, ln, line, source): """ Ancillary routine to process a type. """ @@ -1527,7 +1536,7 @@ class KernelDoc: elif chunk == '}': self.entry.brcount -= 1 elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) + self.dump_declaration(ln, self.entry.prototype, source) self.reset_state(ln) return # @@ -1536,7 +1545,7 @@ class KernelDoc: # self.entry.prototype += ' ' - def process_proto(self, ln, line): + def process_proto(self, ln, line, source): """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): @@ -1548,17 +1557,18 @@ class KernelDoc: self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) + self.process_proto_function(ln, line, source) else: - self.process_proto_type(ln, line) + self.process_proto_type(ln, line, source) - def process_docblock(self, ln, line): + def process_docblock(self, ln, line, source): """STATE_DOCBLOCK: within a ``DOC:`` block.""" if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier) + self.output_declaration("doc", self.entry.identifier, + source=source) self.reset_state(ln) elif doc_content.search(line): @@ -1596,15 +1606,6 @@ class KernelDoc: state.DOCBLOCK: process_docblock, } - def get_source(self): - """ - Return the file content of the lines handled by kernel-doc at the - latest parse_kdoc() run. - - Returns none if KernelDoc() was not initialized with store_src, - """ - return self.source - def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1618,8 +1619,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() - self.source = [] self.state = state.NORMAL + source = "" try: with open(self.fname, "r", encoding="utf8", @@ -1646,7 +1647,11 @@ class KernelDoc: ln, state.name[self.state], line) - prev_state = self.state + if self.store_src: + if source and self.state == state.NORMAL: + source = "" + elif self.state != state.NORMAL: + source += line + "\n" # This is an optimization over the original script. # There, when export_file was used for the same file, @@ -1655,16 +1660,11 @@ class KernelDoc: # if (self.state != state.NORMAL) or \ not self.process_export(export_table, line): + prev_state = self.state # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - if self.store_src and prev_state != self.state or self.state != state.NORMAL: - if self.state == state.NAME: - # A "/**" was detected. Add a new source element - self.source.append({"ln": ln, "data": line + "\n"}) - else: - # Append to the existing one - self.source[-1]["data"] += line + "\n" + self.state_actions[self.state](self, ln, line, source) + if prev_state == state.NORMAL and self.state != state.NORMAL: + source += line + "\n" self.emit_unused_warnings() diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py index 18737abb1176..1e2ae7c59d70 100644 --- a/tools/lib/python/kdoc/kdoc_yaml_file.py +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -85,7 +85,7 @@ class KDocTestFile(): return d - def output_symbols(self, fname, symbols, source): + def output_symbols(self, fname, symbols): """ Store source, symbols and output strings at self.tests. """ @@ -96,16 +96,10 @@ class KDocTestFile(): kdoc_item = [] expected = [] - if not symbols and not source: - return - - if not source or len(symbols) != len(source): - print(f"Warning: lengths are different. Ignoring {fname}") - - # Folding without line numbers is too hard. - # The right thing to do here to proceed would be to delete - # not-handled source blocks, as len(source) should be bigger - # than len(symbols) + # + # Source code didn't produce any symbol + # + if not symbols: return base_name = "test_" + fname.replace(".", "_").replace("/", "_") @@ -115,9 +109,15 @@ class KDocTestFile(): for i in range(0, len(symbols)): arg = symbols[i] - if "KdocItem" in self.yaml_content: + source = arg.get("source", "") + + if arg and "KdocItem" in self.yaml_content: msg = self.get_kdoc_item(arg) + other_stuff = msg.get("other_stuff", {}) + if "source" in other_stuff: + del other_stuff["source"] + expected_dict["kdoc_item"] = msg for out_style in self.out_style: @@ -132,9 +132,9 @@ class KDocTestFile(): test = { "name": name, - "description": f"{fname} line {source[i]["ln"]}", + "description": f"{fname} line {arg.declaration_start_line}", "fname": fname, - "source": source[i]["data"], + "source": source, "expected": [expected_dict] } diff --git a/tools/unittests/test_kdoc_parser.py b/tools/unittests/test_kdoc_parser.py index f2250ef192ce..c4a76ed13dbc 100755 --- a/tools/unittests/test_kdoc_parser.py +++ b/tools/unittests/test_kdoc_parser.py @@ -167,7 +167,16 @@ class GenerateKdocItem(unittest.TestCase): self.assertIsInstance(entry, KdocItem) d = vars(entry) + + other_stuff = d.get("other_stuff", {}) + if "source" in other_stuff: + del other_stuff["source"] + for key, value in expected.items(): + if key == "other_stuff": + if "source" in value: + del value["source"] + result = clean_whitespc(d[key], relax_whitespace) value = clean_whitespc(value, relax_whitespace) -- cgit v1.2.3 From 6fa6b5cb60490db2591bb93872b95f72315e5f53 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 18 Apr 2026 12:21:37 -0700 Subject: docs: kdoc: Expand 'at_least' when creating parameter list sphinx doesn't know that the kernel headers do: #define at_least static Do this replacement before declarations are passed to it. This prevents errors like the following from appearing once the lib/crypto/ kernel-doc is wired up to the sphinx build: linux/Documentation/crypto/libcrypto:128: ./include/crypto/sha2.h:773: WARNING: Error in declarator or parameters Error in declarator or parameters Invalid C declaration: Expected ']' in end of array operator. [error at 59] void sha512_final (struct sha512_ctx *ctx, u8 out[at_least SHA512_DIGEST_SIZE]) Acked-by: Jonathan Corbet Reviewed-by: Ard Biesheuvel Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20260418192138.15556-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- tools/lib/python/kdoc/kdoc_parser.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..901e02e3c043 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -571,6 +571,11 @@ class KernelDoc: # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + # Replace '[at_least ' with '[static '. This allows sphinx to parse + # array parameter declarations like 'char A[at_least 4]', where + # 'at_least' is #defined to 'static' by the kernel headers. + arg = arg.replace('[at_least ', '[static ') + # Strip leading/trailing spaces arg = arg.strip() arg = KernRe(r'\s+').sub(' ', arg, count=1) -- cgit v1.2.3