From 77e6e17e9fc4cb4e59ad97de5453bb6f963a5fd4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:46 +0100
Subject: docs: kdoc_parser: move var transformers to the beginning

Just like functions and structs had their transform variables
placed at the beginning, move variable transforms to there
as well.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <491b290252a308f381f88353a3bbe9e2bd1f6a62.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..68a5aea9175d 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -192,6 +192,18 @@ function_xforms  = [
     (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
 ]
 
+#
+# Transforms for variable prototypes
+#
+var_xforms = [
+    (KernRe(r"__read_mostly"), ""),
+    (KernRe(r"__ro_after_init"), ""),
+    (KernRe(r"(?://.*)$"), ""),
+    (KernRe(r"(?:/\*.*\*/)"), ""),
+    (KernRe(r";$"), ""),
+    (KernRe(r"=.*"), ""),
+]
+
 #
 # Ancillary functions
 #
@@ -972,15 +984,6 @@ class KernelDoc:
         ]
         OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
 
-        sub_prefixes = [
-            (KernRe(r"__read_mostly"), ""),
-            (KernRe(r"__ro_after_init"), ""),
-            (KernRe(r"(?://.*)$"), ""),
-            (KernRe(r"(?:/\*.*\*/)"), ""),
-            (KernRe(r";$"), ""),
-            (KernRe(r"=.*"), ""),
-        ]
-
         #
         # Store the full prototype before modifying it
         #
@@ -1004,7 +1007,7 @@ class KernelDoc:
         # Drop comments and macros to have a pure C prototype
         #
         if not declaration_name:
-            for r, sub in sub_prefixes:
+            for r, sub in var_xforms:
                 proto = r.sub(sub, proto)
 
         proto = proto.rstrip()
-- 
cgit v1.2.3


From cca1bbdd72f72a3cf86d90fd6f326fd709ae931f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:47 +0100
Subject: docs: kdoc_parser: don't mangle with function defines

Mangling with #defines is not nice, as we may end removing
the macro names, preventing several macros from being properly
documented.

Also, on defines, we have something like:

	#define foo(a1, a2, a3, ...)			 \
		/* some real implementation */

The prototype part (first line on this example) won't contain
any macros, so no need to apply any regexes on it.

With that, move the apply_transforms() logic to ensure that
it will be called only on functions.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8f9854c8ca1c794b6a3fe418f7adbc32aa68b432.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 68a5aea9175d..9643ffb7584a 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -163,7 +163,7 @@ struct_nested_prefixes = [
 #
 # Transforms for function prototypes
 #
-function_xforms  = [
+function_xforms = [
     (KernRe(r"^static +"), ""),
     (KernRe(r"^extern +"), ""),
     (KernRe(r"^asmlinkage +"), ""),
@@ -1066,10 +1066,7 @@ class KernelDoc:
         found = func_macro = False
         return_type = ''
         decl_type = 'function'
-        #
-        # Apply the initial transformations.
-        #
-        prototype = apply_transforms(function_xforms, prototype)
+
         #
         # If we have a macro, remove the "#define" at the front.
         #
@@ -1088,6 +1085,11 @@ class KernelDoc:
                 declaration_name = r.group(1)
                 func_macro = True
                 found = True
+        else:
+            #
+            # Apply the initial transformations.
+            #
+            prototype = apply_transforms(function_xforms, prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
-- 
cgit v1.2.3


From 4fd349f03dc51bc2f9cd2ea9f6309b0bc2b848ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:48 +0100
Subject: docs: kdoc_parser: fix variable regexes to work with size_t

The regular expressions meant to pick variable types are too
naive: they forgot that the type word may contain underlines.

It also means that we need to change the regex which detects
var attributes to handle "const".

Co-developed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8230715239929cf9d475ab81ca1df7de65d82d06.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 9643ffb7584a..9c9443281c40 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -981,8 +981,9 @@ class KernelDoc:
         """
         VAR_ATTRIBS = [
             "extern",
+            "const",
         ]
-        OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
+        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
 
         #
         # Store the full prototype before modifying it
@@ -1018,14 +1019,14 @@ class KernelDoc:
 
         default_val = None
 
-        r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             if not declaration_name:
                 declaration_name = r.group(1)
 
             default_val = r.group(2)
         else:
-            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             default_val = r.group(1)
 
-- 
cgit v1.2.3


From 9bbf22b87d866fa1e6a1f9f6376d2ef458b6dcc7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:49 +0100
Subject: docs: kdoc_parser: fix the default_value logic for variables

The indentation is wrong for the second regex, which causes
problems on variables with defaults.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <681f18338abd6ae33cb9c15d72bb31a1cba75a9a.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 9c9443281c40..4bf55244870f 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1027,9 +1027,9 @@ class KernelDoc:
             default_val = r.group(2)
         else:
             r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
-        if r.match(proto):
-            default_val = r.group(1)
 
+            if r.match(proto):
+                default_val = r.group(1)
         if not declaration_name:
            self.emit_msg(ln,f"{proto}: can't parse variable")
            return
-- 
cgit v1.2.3


From b7dc635459ad5b00f2d482406dbdca3291622ce2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:50 +0100
Subject: docs: kdoc_parser: don't exclude defaults from prototype

If we do that, the defaults won't be parsed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <cedf2a819846d2f082388e9ba3d95047c35df6fd.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4bf55244870f..39ff27d421eb 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -201,7 +201,6 @@ var_xforms = [
     (KernRe(r"(?://.*)$"), ""),
     (KernRe(r"(?:/\*.*\*/)"), ""),
     (KernRe(r";$"), ""),
-    (KernRe(r"=.*"), ""),
 ]
 
 #
-- 
cgit v1.2.3


From 6d9c2e9575b8630e17571a77eef8ade84a2a6344 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:51 +0100
Subject: docs: kdoc_parser: fix parser to support multi-word types

The regular expression currently expects a single word for the
type, but it may be something like  "struct foo".

Add support for it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <544c73a9e670b6fef1828bf4f2ba0de7d29d8675.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 39ff27d421eb..22a820d33dc8 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1018,14 +1018,14 @@ class KernelDoc:
 
         default_val = None
 
-        r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             if not declaration_name:
                 declaration_name = r.group(1)
 
             default_val = r.group(2)
         else:
-            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
 
             if r.match(proto):
                 default_val = r.group(1)
-- 
cgit v1.2.3


From 9bff5121fe22fdd0bb5bd6f744e136ec20bf7b95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:52 +0100
Subject: docs: kdoc_parser: add support for LIST_HEAD

Convert LIST_HEAD into struct list_head when handling its
prototype.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8bdfa6ba6002b0a73a83660f0ce7b40e30124552.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 22a820d33dc8..1df869061bf3 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -198,6 +198,7 @@ function_xforms = [
 var_xforms = [
     (KernRe(r"__read_mostly"), ""),
     (KernRe(r"__ro_after_init"), ""),
+    (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
     (KernRe(r"(?://.*)$"), ""),
     (KernRe(r"(?:/\*.*\*/)"), ""),
     (KernRe(r";$"), ""),
-- 
cgit v1.2.3


From 97d4e70bc2c6f75911a9a5e1a75f2de13fde9b6b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 2 Mar 2026 17:40:53 +0100
Subject: docs: kdoc_parser: handle struct member macro
 VIRTIO_DECLARE_FEATURES(name)

Parse the macro VIRTIO_DECLARE_FEATURES(name) and expand it to its
definition. These prevents one build warning:

WARNING: include/linux/virtio.h:188 struct member 'VIRTIO_DECLARE_FEATURES(features' not described in 'virtio_device'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <6f62e1f1210e74906fa50f4e937f66f54813661b.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 1df869061bf3..917e4528bfbf 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -150,6 +150,7 @@ struct_xforms = [
             struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
     (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
     (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+    (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
 ]
 #
 # Regexes here are guaranteed to have the end delimiter matching
-- 
cgit v1.2.3


From d842057c4a205084fb3036122c7426963f04e826 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:55 +0100
Subject: docs: kdoc_parser: move transform lists to a separate file

Over the time, most of the changes at kernel-doc are related
to maintaining a list of transforms to convert macros into pure
C code.

Place such transforms on a separate module, to cleanup the
parser module.

There is an advantage on that: QEMU also uses our own kernel-doc,
but the xforms list there is different. By placing it on a
separate module, we can minimize the differences and make it
easier to keep QEMU in sync with Kernel upstream.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ccd74b7589e1fff340a74bf8ed16a974532cb54f.1772469446.git.mchehab+huawei@kernel.org>
---
 Documentation/tools/kdoc_parser.rst   |   8 ++
 tools/lib/python/kdoc/kdoc_files.py   |   3 +-
 tools/lib/python/kdoc/kdoc_parser.py  | 145 ++------------------------------
 tools/lib/python/kdoc/xforms_lists.py | 153 ++++++++++++++++++++++++++++++++++
 4 files changed, 168 insertions(+), 141 deletions(-)
 create mode 100644 tools/lib/python/kdoc/xforms_lists.py

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/Documentation/tools/kdoc_parser.rst b/Documentation/tools/kdoc_parser.rst
index 03ee54a1b1cc..55b202173195 100644
--- a/Documentation/tools/kdoc_parser.rst
+++ b/Documentation/tools/kdoc_parser.rst
@@ -4,6 +4,14 @@
 Kernel-doc parser stage
 =======================
 
+C replacement rules used by the parser
+======================================
+
+.. automodule:: lib.python.kdoc.xforms_lists
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 File handler classes
 ====================
 
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 022487ea2cc6..33618c6abec2 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -15,6 +15,7 @@ import os
 import re
 
 from kdoc.kdoc_parser import KernelDoc
+from kdoc.xforms_lists import CTransforms
 from kdoc.kdoc_output import OutputFormat
 
 
@@ -117,7 +118,7 @@ class KernelFiles():
         if fname in self.files:
             return
 
-        doc = KernelDoc(self.config, fname)
+        doc = KernelDoc(self.config, fname, CTransforms())
         export_table, entries = doc.parse_kdoc()
 
         self.export_table[fname] = export_table
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 917e4528bfbf..d7daf658e9d2 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -69,89 +69,6 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
                         r'(?:[-:].*)?$',		# description (not captured)
                         cache = False)
 
-#
-# Here begins a long set of transformations to turn structure member prefixes
-# and macro invocations into something we can parse and generate kdoc for.
-#
-struct_args_pattern = r'([^,)]+)'
-
-struct_xforms = [
-    # Strip attributes
-    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
-    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__packed\s*', re.S), ' '),
-    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-    (KernRe(r'\s*__private', re.S), ' '),
-    (KernRe(r'\s*__rcu', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-    #
-    # Unwrap struct_group macros based on this definition:
-    # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-    # which has variants like: struct_group(NAME, MEMBERS...)
-    # Only MEMBERS arguments require documentation.
-    #
-    # Parsing them happens on two steps:
-    #
-    # 1. drop struct group arguments that aren't at MEMBERS,
-    #    storing them as STRUCT_GROUP(MEMBERS)
-    #
-    # 2. remove STRUCT_GROUP() ancillary macro.
-    #
-    # The original logic used to remove STRUCT_GROUP() using an
-    # advanced regex:
-    #
-    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-    #
-    # with two patterns that are incompatible with
-    # Python re module, as it has:
-    #
-    #   - a recursive pattern: (?1)
-    #   - an atomic grouping: (?>...)
-    #
-    # I tried a simpler version: but it didn't work either:
-    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-    #
-    # As it doesn't properly match the end parenthesis on some cases.
-    #
-    # So, a better solution was crafted: there's now a NestedMatch
-    # class that ensures that delimiters after a search are properly
-    # matched. So, the implementation to drop STRUCT_GROUP() will be
-    # handled in separate.
-    #
-    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
-    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
-    #
-    # Replace macros
-    #
-    # TODO: use NestedMatch for FOO($1, $2, ...) matches
-    #
-    # it is better to also move those to the NestedMatch logic,
-    # to ensure that parentheses will be properly matched.
-    #
-    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
-            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
-    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
-    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-    (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
-]
 #
 # Regexes here are guaranteed to have the end delimiter matching
 # the start delimiter. Yet, right now, only one replace group
@@ -161,62 +78,10 @@ struct_nested_prefixes = [
     (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
 ]
 
-#
-# Transforms for function prototypes
-#
-function_xforms = [
-    (KernRe(r"^static +"), ""),
-    (KernRe(r"^extern +"), ""),
-    (KernRe(r"^asmlinkage +"), ""),
-    (KernRe(r"^inline +"), ""),
-    (KernRe(r"^__inline__ +"), ""),
-    (KernRe(r"^__inline +"), ""),
-    (KernRe(r"^__always_inline +"), ""),
-    (KernRe(r"^noinline +"), ""),
-    (KernRe(r"^__FORTIFY_INLINE +"), ""),
-    (KernRe(r"__init +"), ""),
-    (KernRe(r"__init_or_module +"), ""),
-    (KernRe(r"__exit +"), ""),
-    (KernRe(r"__deprecated +"), ""),
-    (KernRe(r"__flatten +"), ""),
-    (KernRe(r"__meminit +"), ""),
-    (KernRe(r"__must_check +"), ""),
-    (KernRe(r"__weak +"), ""),
-    (KernRe(r"__sched +"), ""),
-    (KernRe(r"_noprof"), ""),
-    (KernRe(r"__always_unused *"), ""),
-    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
-    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
-    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
-    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
-    (KernRe(r"__attribute_const__ +"), ""),
-    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
-]
-
-#
-# Transforms for variable prototypes
-#
-var_xforms = [
-    (KernRe(r"__read_mostly"), ""),
-    (KernRe(r"__ro_after_init"), ""),
-    (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
-    (KernRe(r"(?://.*)$"), ""),
-    (KernRe(r"(?:/\*.*\*/)"), ""),
-    (KernRe(r";$"), ""),
-]
-
 #
 # Ancillary functions
 #
 
-def apply_transforms(xforms, text):
-    """
-    Apply a set of transforms to a block of text.
-    """
-    for search, subst in xforms:
-        text = search.sub(subst, text)
-    return text
-
 multi_space = KernRe(r'\s\s+')
 def trim_whitespace(s):
     """
@@ -395,11 +260,12 @@ class KernelDoc:
     #: String to write when a parameter is not described.
     undescribed = "-- undescribed --"
 
-    def __init__(self, config, fname):
+    def __init__(self, config, fname, xforms):
         """Initialize internal variables"""
 
         self.fname = fname
         self.config = config
+        self.xforms = xforms
 
         # Initial state for the state machines
         self.state = state.NORMAL
@@ -883,7 +749,7 @@ class KernelDoc:
         # Go through the list of members applying all of our transformations.
         #
         members = trim_private_members(members)
-        members = apply_transforms(struct_xforms, members)
+        members = self.xforms.apply("struct", members)
 
         nested = NestedMatch()
         for search, sub in struct_nested_prefixes:
@@ -1009,8 +875,7 @@ class KernelDoc:
         # Drop comments and macros to have a pure C prototype
         #
         if not declaration_name:
-            for r, sub in var_xforms:
-                proto = r.sub(sub, proto)
+            proto = self.xforms.apply("var", proto)
 
         proto = proto.rstrip()
 
@@ -1091,7 +956,7 @@ class KernelDoc:
             #
             # Apply the initial transformations.
             #
-            prototype = apply_transforms(function_xforms, prototype)
+            prototype = self.xforms.apply("func", prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
new file mode 100644
index 000000000000..e6e0302e5dd0
--- /dev/null
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import re
+
+from kdoc.kdoc_re import KernRe
+
+struct_args_pattern = r'([^,)]+)'
+
+class CTransforms:
+    """
+    Data class containing a long set of transformations to turn
+    structure member prefixes, and macro invocations and variables
+    into something we can parse and generate kdoc for.
+    """
+
+    #: Transforms for structs and unions.
+    struct_xforms = [
+        # Strip attributes
+        (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+        (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__packed\s*', re.S), ' '),
+        (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+        (KernRe(r'\s*__private', re.S), ' '),
+        (KernRe(r'\s*__rcu', re.S), ' '),
+        (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+        (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+        (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+        #
+        # Unwrap struct_group macros based on this definition:
+        # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+        # which has variants like: struct_group(NAME, MEMBERS...)
+        # Only MEMBERS arguments require documentation.
+        #
+        # Parsing them happens on two steps:
+        #
+        # 1. drop struct group arguments that aren't at MEMBERS,
+        #    storing them as STRUCT_GROUP(MEMBERS)
+        #
+        # 2. remove STRUCT_GROUP() ancillary macro.
+        #
+        # The original logic used to remove STRUCT_GROUP() using an
+        # advanced regex:
+        #
+        #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+        #
+        # with two patterns that are incompatible with
+        # Python re module, as it has:
+        #
+        #   - a recursive pattern: (?1)
+        #   - an atomic grouping: (?>...)
+        #
+        # I tried a simpler version: but it didn't work either:
+        #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+        #
+        # As it doesn't properly match the end parenthesis on some cases.
+        #
+        # So, a better solution was crafted: there's now a NestedMatch
+        # class that ensures that delimiters after a search are properly
+        # matched. So, the implementation to drop STRUCT_GROUP() will be
+        # handled in separate.
+        #
+        (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+        (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+        (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+        (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+        #
+        # Replace macros
+        #
+        # TODO: use NestedMatch for FOO($1, $2, ...) matches
+        #
+        # it is better to also move those to the NestedMatch logic,
+        # to ensure that parentheses will be properly matched.
+        #
+        (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+        r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+        (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+        r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+        (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+                re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+        (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+                re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+        (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+                r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+        (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+                struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+        (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+                struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+        (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+        (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+        (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+    ]
+
+    #: Transforms for function prototypes.
+    function_xforms = [
+        (KernRe(r"^static +"), ""),
+        (KernRe(r"^extern +"), ""),
+        (KernRe(r"^asmlinkage +"), ""),
+        (KernRe(r"^inline +"), ""),
+        (KernRe(r"^__inline__ +"), ""),
+        (KernRe(r"^__inline +"), ""),
+        (KernRe(r"^__always_inline +"), ""),
+        (KernRe(r"^noinline +"), ""),
+        (KernRe(r"^__FORTIFY_INLINE +"), ""),
+        (KernRe(r"__init +"), ""),
+        (KernRe(r"__init_or_module +"), ""),
+        (KernRe(r"__exit +"), ""),
+        (KernRe(r"__deprecated +"), ""),
+        (KernRe(r"__flatten +"), ""),
+        (KernRe(r"__meminit +"), ""),
+        (KernRe(r"__must_check +"), ""),
+        (KernRe(r"__weak +"), ""),
+        (KernRe(r"__sched +"), ""),
+        (KernRe(r"_noprof"), ""),
+        (KernRe(r"__always_unused *"), ""),
+        (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+        (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+        (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+        (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+        (KernRe(r"__attribute_const__ +"), ""),
+        (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+    ]
+
+    #: Transforms for variable prototypes.
+    var_xforms = [
+        (KernRe(r"__read_mostly"), ""),
+        (KernRe(r"__ro_after_init"), ""),
+        (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
+        (KernRe(r"(?://.*)$"), ""),
+        (KernRe(r"(?:/\*.*\*/)"), ""),
+        (KernRe(r";$"), ""),
+    ]
+
+    #: Transforms main dictionary used at apply_transforms().
+    xforms = {
+        "struct": struct_xforms,
+        "func": function_xforms,
+        "var": var_xforms,
+    }
+
+    def apply(self, xforms_type, text):
+        """
+        Apply a set of transforms to a block of text.
+        """
+        if xforms_type not in self.xforms:
+            return text
+
+        for search, subst in self.xforms[xforms_type]:
+            text = search.sub(subst, text)
+        return text
-- 
cgit v1.2.3


From 4ff59bdd93f0e80b5014977502d082c778f96304 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 2 Mar 2026 17:40:56 +0100
Subject: docs: xforms_lists: ignore context analysis and lock attributes

Drop context analysis and lock (tracking) attributes to avoid
kernel-doc warnings.

There are now lots of warnings like these:

    Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96]
      int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex)
      ------------------------------------------------------------------------------------------------^
    Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92]
      int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock)
      --------------------------------------------------------------------------------------------^

The regex is suggested by Mauro; mine was too greedy. Thanks.
Updated context analysis and lock macros list provided by PeterZ. Thanks.

[mchehab: modified to be applied after xforms_lists split]

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <3c7fdfc364a8920f92530b47bdbf4bb29a40371f.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  | 10 ++++++++++
 tools/lib/python/kdoc/xforms_lists.py |  5 +++++
 2 files changed, 15 insertions(+)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index d7daf658e9d2..503a18212747 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -75,6 +75,16 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
 # is allowed.
 #
 struct_nested_prefixes = [
+    (re.compile(r"__cond_acquires\s*\("), ""),
+    (re.compile(r"__cond_releases\s*\("), ""),
+    (re.compile(r"__acquires\s*\("), ""),
+    (re.compile(r"__releases\s*\("), ""),
+    (re.compile(r"__must_hold\s*\("), ""),
+    (re.compile(r"__must_not_hold\s*\("), ""),
+    (re.compile(r"__must_hold_shared\s*\("), ""),
+    (re.compile(r"__cond_acquires_shared\s*\("), ""),
+    (re.compile(r"__acquires_shared\s*\("), ""),
+    (re.compile(r"__releases_shared\s*\("), ""),
     (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
 ]
 
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index e6e0302e5dd0..1bda7c4634c3 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -22,6 +22,8 @@ class CTransforms:
         (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
         (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
         (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
+        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
         (KernRe(r'\s*__packed\s*', re.S), ' '),
         (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
         (KernRe(r'\s*__private', re.S), ' '),
@@ -120,6 +122,7 @@ class CTransforms:
         (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
         (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
         (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+        (KernRe(r"__no_context_analysis\s*"), ""),
         (KernRe(r"__attribute_const__ +"), ""),
         (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
     ]
@@ -128,6 +131,8 @@ class CTransforms:
     var_xforms = [
         (KernRe(r"__read_mostly"), ""),
         (KernRe(r"__ro_after_init"), ""),
+        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
+        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
         (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
         (KernRe(r"(?://.*)$"), ""),
         (KernRe(r"(?:/\*.*\*/)"), ""),
-- 
cgit v1.2.3


From 34503b5fd10d8c7f1b1f4fecb6aae826fcf79424 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:59 +0100
Subject: docs: kdoc_re: Change NestedMath args replacement to \0

Future patches will allow parsing each argument instead of the
hole set. Prepare for it by changing the replace all args from
\1 to \0.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <46e383118be9d9e432e3814fe819ebb12261d7b4.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 2 +-
 tools/lib/python/kdoc/kdoc_re.py     | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 503a18212747..0f90c16cb51a 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -85,7 +85,7 @@ struct_nested_prefixes = [
     (re.compile(r"__cond_acquires_shared\s*\("), ""),
     (re.compile(r"__acquires_shared\s*\("), ""),
     (re.compile(r"__releases_shared\s*\("), ""),
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
 ]
 
 #
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 00afa5bccd6d..ea4f6f3d9e42 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -188,7 +188,7 @@ class NestedMatch:
     # except that the content inside the match group is delimiter-aligned.
     #
     # The content inside parentheses is converted into a single replace
-    # group (e.g. r`\1').
+    # group (e.g. r`\0').
     #
     # It would be nice to change such definition to support multiple
     # match groups, allowing a regex equivalent to:
@@ -291,7 +291,7 @@ class NestedMatch:
 
         if the sub argument contains::
 
-            r'\1'
+            r'\0'
 
         it will work just like re: it places there the matched paired data
         with the delimiter stripped.
@@ -310,9 +310,9 @@ class NestedMatch:
             # Value, ignoring start/end delimiters
             value = line[end:pos - 1]
 
-            # replaces \1 at the sub string, if \1 is used there
+            # replaces \0 at the sub string, if \0 is used there
             new_sub = sub
-            new_sub = new_sub.replace(r'\1', value)
+            new_sub = new_sub.replace(r'\0', value)
 
             out += new_sub
 
-- 
cgit v1.2.3


From fc44c0a0b2a72f2e9331063a311a548634ae18af Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:41:00 +0100
Subject: docs: kdoc_re: make NestedMatch use KernRe

Instead of using re_compile, let's create the class with the
regex and use KernRe to keep it cached.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <cdf900faf0ed8a08f8c6ac1db5a43342968c0739.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 25 ++++++++++++-------------
 tools/lib/python/kdoc/kdoc_re.py     | 24 +++++++++++++++++-------
 2 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 0f90c16cb51a..cd9857906a2b 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -75,17 +75,17 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
 # is allowed.
 #
 struct_nested_prefixes = [
-    (re.compile(r"__cond_acquires\s*\("), ""),
-    (re.compile(r"__cond_releases\s*\("), ""),
-    (re.compile(r"__acquires\s*\("), ""),
-    (re.compile(r"__releases\s*\("), ""),
-    (re.compile(r"__must_hold\s*\("), ""),
-    (re.compile(r"__must_not_hold\s*\("), ""),
-    (re.compile(r"__must_hold_shared\s*\("), ""),
-    (re.compile(r"__cond_acquires_shared\s*\("), ""),
-    (re.compile(r"__acquires_shared\s*\("), ""),
-    (re.compile(r"__releases_shared\s*\("), ""),
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
+    (NestedMatch(r"__cond_acquires\s*\("), ""),
+    (NestedMatch(r"__cond_releases\s*\("), ""),
+    (NestedMatch(r"__acquires\s*\("), ""),
+    (NestedMatch(r"__releases\s*\("), ""),
+    (NestedMatch(r"__must_hold\s*\("), ""),
+    (NestedMatch(r"__must_not_hold\s*\("), ""),
+    (NestedMatch(r"__must_hold_shared\s*\("), ""),
+    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
+    (NestedMatch(r"__acquires_shared\s*\("), ""),
+    (NestedMatch(r"__releases_shared\s*\("), ""),
+    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
 ]
 
 #
@@ -761,9 +761,8 @@ class KernelDoc:
         members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
-        nested = NestedMatch()
         for search, sub in struct_nested_prefixes:
-            members = nested.sub(search, sub, members)
+            members = search.sub(search, sub, members)
         #
         # Deal with embedded struct and union members, and drop enums entirely.
         #
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index ea4f6f3d9e42..085b89a4547c 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -200,7 +200,10 @@ class NestedMatch:
     #
     #   FOO(arg1, arg2, arg3)
 
-    def _search(self, regex, line):
+    def __init__(self, regex):
+        self.regex = KernRe(regex)
+
+    def _search(self, line):
         """
         Finds paired blocks for a regex that ends with a delimiter.
 
@@ -222,7 +225,7 @@ class NestedMatch:
 
         stack = []
 
-        for match_re in regex.finditer(line):
+        for match_re in self.regex.finditer(line):
             start = match_re.start()
             offset = match_re.end()
             string_char = None
@@ -270,7 +273,7 @@ class NestedMatch:
                         yield start, offset, pos + 1
                         break
 
-    def search(self, regex, line):
+    def search(self, line):
         """
         This is similar to re.search:
 
@@ -278,12 +281,12 @@ class NestedMatch:
         returning occurrences only if all delimiters are paired.
         """
 
-        for t in self._search(regex, line):
+        for t in self._search(line):
 
             yield line[t[0]:t[2]]
 
-    def sub(self, regex, sub, line, count=0):
-        r"""
+    def sub(self, sub, line, count=0):
+        """
         This is similar to re.sub:
 
         It matches a regex that it is followed by a delimiter,
@@ -304,7 +307,7 @@ class NestedMatch:
         cur_pos = 0
         n = 0
 
-        for start, end, pos in self._search(regex, line):
+        for start, end, pos in self._search(line):
             out += line[cur_pos:start]
 
             # Value, ignoring start/end delimiters
@@ -331,3 +334,10 @@ class NestedMatch:
         out += line[cur_pos:l]
 
         return out
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'NestedMatch("{self.regex.regex.pattern}")'
-- 
cgit v1.2.3


From 85c2a51357f720fabfb6fa8d2551d87a94e797cb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:41:01 +0100
Subject: docs: kdoc_parser: move nested match transforms to xforms_lists.py

As NestedMatch now has a sub method and a declaration close to
what KernRe does, we can move the rules to xforms_lists and
simplify kdoc_parser a little bit.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <762ce2a58ff024c1b0b6f6a6e05020d1415b8308.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  | 21 ---------------------
 tools/lib/python/kdoc/xforms_lists.py | 14 +++++++++++++-
 2 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index cd9857906a2b..edf70ba139a5 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -69,25 +69,6 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
                         r'(?:[-:].*)?$',		# description (not captured)
                         cache = False)
 
-#
-# Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
-    (NestedMatch(r"__cond_acquires\s*\("), ""),
-    (NestedMatch(r"__cond_releases\s*\("), ""),
-    (NestedMatch(r"__acquires\s*\("), ""),
-    (NestedMatch(r"__releases\s*\("), ""),
-    (NestedMatch(r"__must_hold\s*\("), ""),
-    (NestedMatch(r"__must_not_hold\s*\("), ""),
-    (NestedMatch(r"__must_hold_shared\s*\("), ""),
-    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
-    (NestedMatch(r"__acquires_shared\s*\("), ""),
-    (NestedMatch(r"__releases_shared\s*\("), ""),
-    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
-]
-
 #
 # Ancillary functions
 #
@@ -761,8 +742,6 @@ class KernelDoc:
         members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
-        for search, sub in struct_nested_prefixes:
-            members = search.sub(search, sub, members)
         #
         # Deal with embedded struct and union members, and drop enums entirely.
         #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 1bda7c4634c3..c07cbe1e6349 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -4,7 +4,7 @@
 
 import re
 
-from kdoc.kdoc_re import KernRe
+from kdoc.kdoc_re import KernRe, NestedMatch
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -94,6 +94,18 @@ class CTransforms:
         (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
         (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
         (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+
+        (NestedMatch(r"__cond_acquires\s*\("), ""),
+        (NestedMatch(r"__cond_releases\s*\("), ""),
+        (NestedMatch(r"__acquires\s*\("), ""),
+        (NestedMatch(r"__releases\s*\("), ""),
+        (NestedMatch(r"__must_hold\s*\("), ""),
+        (NestedMatch(r"__must_not_hold\s*\("), ""),
+        (NestedMatch(r"__must_hold_shared\s*\("), ""),
+        (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
+        (NestedMatch(r"__acquires_shared\s*\("), ""),
+        (NestedMatch(r"__releases_shared\s*\("), ""),
+        (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
     ]
 
     #: Transforms for function prototypes.
-- 
cgit v1.2.3


From b1e64e30fce86e61d3b09f9352b262622f3f0cda Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:23 +0100
Subject: docs: kdoc: don't add broken comments inside prototypes

Parsing a file like drivers/scsi/isci/host.h, which contains
broken kernel-doc markups makes it create a prototype that contains
unmatched end comments.

That causes, for instance, struct sci_power_control to be shown this
this prototype:

    struct sci_power_control {
        * it is not. */ bool timer_started;
        */ struct sci_timer timer;
        * requesters field. */ u8 phys_waiting;
        */ u8 phys_granted_power;
        * mapped into requesters via struct sci_phy.phy_index */ struct isci_phy *requesters[SCI_MAX_PHYS];
    };

as comments won't start with "/*" anymore.

Fix the logic to detect such cases, and keep adding the comments
inside it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <12ac4a97e2bd5a19d6537122c10098690c38d2c7.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index edf70ba139a5..086579d00b5c 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1355,6 +1355,12 @@ class KernelDoc:
         elif doc_content.search(line):
             self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
             self.state = state.PROTO
+
+            #
+            # Don't let it add partial comments at the code, as breaks the
+            # logic meant to remove comments from prototypes.
+            #
+            self.process_proto_type(ln, "/**\n" + line)
         # else ... ??
 
     def process_inline_text(self, ln, line):
-- 
cgit v1.2.3


From d5265f7af2d284d5421b763f268157b5fa72f806 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:24 +0100
Subject: docs: kdoc: properly handle empty enum arguments

Depending on how the enum proto is written, a comma at the end
may incorrectly make kernel-doc parse an arg like " ".

Strip spaces before checking if arg is empty.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <640784283d52c5fc52ea597344ecd567e2fb6e22.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 086579d00b5c..4b3c555e6c8e 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -810,9 +810,10 @@ class KernelDoc:
         member_set = set()
         members = KernRe(r'\([^;)]*\)').sub('', members)
         for arg in members.split(','):
-            if not arg:
-                continue
             arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            if not arg.strip():
+                continue
+
             self.entry.parameterlist.append(arg)
             if arg not in self.entry.parameterdescs:
                 self.entry.parameterdescs[arg] = self.undescribed
-- 
cgit v1.2.3


From cd77a9aa20ef53a03e5bb2630a5e7b16b910f198 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:26 +0100
Subject: docs: kdoc: use tokenizer to handle comments on structs

Better handle comments inside structs. After those changes,
all unittests now pass:

  test_private:
    TestPublicPrivate:
        test balanced_inner_private:                                 OK
        test balanced_non_greddy_private:                            OK
        test balanced_private:                                       OK
        test no private:                                             OK
        test unbalanced_inner_private:                               OK
        test unbalanced_private:                                     OK
        test unbalanced_struct_group_tagged_with_private:            OK
        test unbalanced_two_struct_group_tagged_first_with_private:  OK
        test unbalanced_without_end_of_line:                         OK

  Ran 9 tests

This also solves a bug when handling STRUCT_GROUP() with a private
comment on it:

	@@ -397134,7 +397134,7 @@ basic V4L2 device-level support.
	             unsigned int    max_len;
	             unsigned int    offset;
	             struct page_pool_params_slow  slow;
	-            STRUCT_GROUP( struct net_device *netdev;
	+            struct net_device *netdev;
	             unsigned int queue_idx;
	             unsigned int    flags;
	       };

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <f83ee9e8c38407eaab6ad10d4ccf155fb36683cc.1773074166.git.mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <054763260f7b5459ad0738ed906d7c358d640692.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4b3c555e6c8e..62d8030cf532 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,6 +13,7 @@ import sys
 import re
 from pprint import pformat
 
+from kdoc.c_lex import CTokenizer
 from kdoc.kdoc_re import NestedMatch, KernRe
 from kdoc.kdoc_item import KdocItem
 
@@ -84,15 +85,9 @@ def trim_private_members(text):
     """
     Remove ``struct``/``enum`` members that have been marked "private".
     """
-    # First look for a "public:" block that ends a private region, then
-    # handle the "private until the end" case.
-    #
-    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
-    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
-    #
-    # We needed the comments to do the above, but now we can take them out.
-    #
-    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+    tokens = CTokenizer(text)
+    return str(tokens)
 
 class state:
     """
-- 
cgit v1.2.3


From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:34 +0100
Subject: docs: kdoc: replace NestedMatch with CMatch

Our previous approach to solve nested structs were to use
NestedMatch. It works well, but adding support to parse delimiters
is very complex.

Instead, use CMatch, which uses a C tokenizer, making the code more
reliable and simpler.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  |  2 +-
 tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++---------------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 62d8030cf532..efd58c88ff31 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -14,7 +14,7 @@ import re
 from pprint import pformat
 
 from kdoc.c_lex import CTokenizer
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_re import KernRe
 from kdoc.kdoc_item import KdocItem
 
 #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index c07cbe1e6349..7fa7f52cec7b 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -4,7 +4,8 @@
 
 import re
 
-from kdoc.kdoc_re import KernRe, NestedMatch
+from kdoc.kdoc_re import KernRe
+from kdoc.c_lex import CMatch
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -60,7 +61,7 @@ class CTransforms:
         #
         # As it doesn't properly match the end parenthesis on some cases.
         #
-        # So, a better solution was crafted: there's now a NestedMatch
+        # So, a better solution was crafted: there's now a CMatch
         # class that ensures that delimiters after a search are properly
         # matched. So, the implementation to drop STRUCT_GROUP() will be
         # handled in separate.
@@ -72,9 +73,9 @@ class CTransforms:
         #
         # Replace macros
         #
-        # TODO: use NestedMatch for FOO($1, $2, ...) matches
+        # TODO: use CMatch for FOO($1, $2, ...) matches
         #
-        # it is better to also move those to the NestedMatch logic,
+        # it is better to also move those to the CMatch logic,
         # to ensure that parentheses will be properly matched.
         #
         (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
@@ -95,17 +96,17 @@ class CTransforms:
         (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
         (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
 
-        (NestedMatch(r"__cond_acquires\s*\("), ""),
-        (NestedMatch(r"__cond_releases\s*\("), ""),
-        (NestedMatch(r"__acquires\s*\("), ""),
-        (NestedMatch(r"__releases\s*\("), ""),
-        (NestedMatch(r"__must_hold\s*\("), ""),
-        (NestedMatch(r"__must_not_hold\s*\("), ""),
-        (NestedMatch(r"__must_hold_shared\s*\("), ""),
-        (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
-        (NestedMatch(r"__acquires_shared\s*\("), ""),
-        (NestedMatch(r"__releases_shared\s*\("), ""),
-        (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
+        (CMatch(r"__cond_acquires"), ""),
+        (CMatch(r"__cond_releases"), ""),
+        (CMatch(r"__acquires"), ""),
+        (CMatch(r"__releases"), ""),
+        (CMatch(r"__must_hold"), ""),
+        (CMatch(r"__must_not_hold"), ""),
+        (CMatch(r"__must_hold_shared"), ""),
+        (CMatch(r"__cond_acquires_shared"), ""),
+        (CMatch(r"__acquires_shared"), ""),
+        (CMatch(r"__releases_shared"), ""),
+        (CMatch(r"STRUCT_GROUP"), r'\0'),
     ]
 
     #: Transforms for function prototypes.
-- 
cgit v1.2.3


From 024e200e2a89d71dceff7d1bff8ae77b145726e0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:38 +0100
Subject: docs: c_lex: setup a logger to report tokenizer issues

Report file that has issues detected via CMatch and CTokenizer.

This is done by setting up a logger that will be overriden by
kdoc_parser, when used on it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <903ad83ae176196a50444e66177a4f5bcdef5199.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py       | 16 ++++++++++++++++
 tools/lib/python/kdoc/kdoc_parser.py |  4 +++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 20e50ff0ecd5..b6d58bd470a9 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -22,6 +22,22 @@ from .kdoc_re import KernRe
 
 log = logging.getLogger(__name__)
 
+def tokenizer_set_log(logger, prefix = ""):
+    """
+    Replace the module‑level logger with a LoggerAdapter that
+    prepends *prefix* to every message.
+    """
+    global log
+
+    class PrefixAdapter(logging.LoggerAdapter):
+        """
+        Ancillary class to set prefix on all message logs.
+        """
+        def process(self, msg, kwargs):
+            return f"{prefix}{msg}", kwargs
+
+    # Wrap the provided logger in our adapter
+    log = PrefixAdapter(logger, {"prefix": prefix})
 
 class CToken():
     """
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index efd58c88ff31..f90c6dd0343d 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,7 @@ import sys
 import re
 from pprint import pformat
 
-from kdoc.c_lex import CTokenizer
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
 from kdoc.kdoc_re import KernRe
 from kdoc.kdoc_item import KdocItem
 
@@ -253,6 +253,8 @@ class KernelDoc:
         self.config = config
         self.xforms = xforms
 
+        tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
+
         # Initial state for the state machines
         self.state = state.NORMAL
 
-- 
cgit v1.2.3


From 12aa7753ff4c5fea405d139bcf67f49bda2c932e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:40 +0100
Subject: docs: kdoc: ensure that comments are dropped before calling
 split_struct_proto()

Changeset 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes")
revealed a hidden bug at split_struct_proto(): some comments there may break
its capability of properly identifying a struct.

Fixing it is as simple as stripping comments before calling it.

Fixes: 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <dcff37b6da5329aea415de31f543b6a1c2cbbbce.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index f90c6dd0343d..8b2c9d0f0c58 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -723,6 +723,7 @@ class KernelDoc:
         #
         # Do the basic parse to get the pieces of the declaration.
         #
+        proto = trim_private_members(proto)
         struct_parts = self.split_struct_proto(proto)
         if not struct_parts:
             self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -763,6 +764,7 @@ class KernelDoc:
         # Strip preprocessor directives.  Note that this depends on the
         # trailing semicolon we added in process_proto_type().
         #
+        proto = trim_private_members(proto)
         proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
         #
         # Parse out the name and members of the enum.  Typedef form first.
@@ -770,7 +772,7 @@ class KernelDoc:
         r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
         if r.search(proto):
             declaration_name = r.group(2)
-            members = trim_private_members(r.group(1))
+            members = r.group(1)
         #
         # Failing that, look for a straight enum
         #
@@ -778,7 +780,7 @@ class KernelDoc:
             r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
             if r.match(proto):
                 declaration_name = r.group(1)
-                members = trim_private_members(r.group(2))
+                members = r.group(2)
         #
         # OK, this isn't going to work.
         #
-- 
cgit v1.2.3


From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:41 +0100
Subject: docs: kdoc_parser: avoid tokenizing structs everytime

Most of the rules inside CTransforms are of the type CMatch.

Don't re-parse the source code every time.

Doing this doesn't change the output, but makes kdoc almost
as fast as before the tokenizer patches:

    # Before tokenizer patches
    $ time ./scripts/kernel-doc . -man >original 2>&1

    real    0m42.933s
    user    0m36.523s
    sys     0m1.145s

    # After tokenizer patches
    $ time ./scripts/kernel-doc . -man >before 2>&1

    real    1m29.853s
    user    1m23.974s
    sys     0m1.237s

    # After this patch
    $ time ./scripts/kernel-doc . -man >after 2>&1

    real    0m48.579s
    user    0m45.938s
    sys     0m0.988s

    $ diff -s before after
    Files before and after are identical

Manually checked the differences between original and after
with:

    $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less

They're due:
  - whitespace fixes;
  - struct_group are now better handled;
  - several badly-generated man pages from broken inline kernel-doc
    markups are now fixed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  |  1 -
 tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 8b2c9d0f0c58..f6c4ee3b18c9 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -737,7 +737,6 @@ class KernelDoc:
         #
         # Go through the list of members applying all of our transformations.
         #
-        members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
         #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 2056572852fd..5a62d4a450cb 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -5,7 +5,7 @@
 import re
 
 from kdoc.kdoc_re import KernRe
-from kdoc.c_lex import CMatch
+from kdoc.c_lex import CMatch, CTokenizer
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -16,6 +16,12 @@ class CTransforms:
     into something we can parse and generate kdoc for.
     """
 
+    #
+    # NOTE:
+    #      Due to performance reasons, place CMatch rules before KernRe,
+    #      as this avoids running the C parser every time.
+    #
+
     #: Transforms for structs and unions.
     struct_xforms = [
         # Strip attributes
@@ -124,13 +130,25 @@ class CTransforms:
         "var": var_xforms,
     }
 
-    def apply(self, xforms_type, text):
+    def apply(self, xforms_type, source):
         """
-        Apply a set of transforms to a block of text.
+        Apply a set of transforms to a block of source.
+
+        As tokenizer is used here, this function also remove comments
+        at the end.
         """
         if xforms_type not in self.xforms:
-            return text
+            return source
+
+        if isinstance(source, str):
+            source = CTokenizer(source)
 
         for search, subst in self.xforms[xforms_type]:
-            text = search.sub(subst, text)
-        return text
+            #
+            # KernRe only accept strings.
+            #
+            if isinstance(search, KernRe):
+                source = str(source)
+
+            source = search.sub(subst, source)
+        return str(source)
-- 
cgit v1.2.3


From e394855fcc897f73f23c364a3a596b54cc879e4c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:08 +0100
Subject: docs: kdoc_item: fix a typo on sections_start_lines

Currently, there are 15 occurrences of section?_start_lines,
with 10 using the plural way.

This is an issue, as, while kdoc_output works with KdocItem,
the term doesn't match its init value.

The variable sections_start_lines stores multiple sections,
so placing it in plural is its correct way.

So, ensure that, on all parts of kdoc, this will be referred
as sections_start_lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <d1e0f1d3f80df41c11a1bbde6a12fd9468bc3813.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_item.py   | 2 +-
 tools/lib/python/kdoc/kdoc_output.py | 2 +-
 tools/lib/python/kdoc/kdoc_parser.py | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index 5f41790efacb..fe08cac861c2 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -82,7 +82,7 @@ class KdocItem:
         Set sections and start lines.
         """
         self.sections = sections
-        self.section_start_lines = start_lines
+        self.sections_start_lines = start_lines
 
     def set_params(self, names, descs, types, starts):
         """
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 73d71cbeabb5..1b54117dbe19 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -389,7 +389,7 @@ class RestFormat(OutputFormat):
             else:
                 self.data += f'{self.lineprefix}**{section}**\n\n'
 
-            self.print_lineno(args.section_start_lines.get(section, 0))
+            self.print_lineno(args.sections_start_lines.get(section, 0))
             self.output_highlight(text)
             self.data += "\n"
         self.data += "\n"
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index f6c4ee3b18c9..35658a7e72d5 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -140,7 +140,7 @@ class KernelEntry:
         self.parametertypes = {}
         self.parameterdesc_start_lines = {}
 
-        self.section_start_lines = {}
+        self.sections_start_lines = {}
         self.sections = {}
 
         self.anon_struct_union = False
@@ -220,7 +220,7 @@ class KernelEntry:
                 self.sections[name] += '\n' + contents
             else:
                 self.sections[name] = contents
-                self.section_start_lines[name] = self.new_start_line
+                self.sections_start_lines[name] = self.new_start_line
                 self.new_start_line = 0
 
 #        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -316,7 +316,7 @@ class KernelDoc:
         for section in ["Description", "Return"]:
             if section in sections and not sections[section].rstrip():
                 del sections[section]
-        item.set_sections(sections, self.entry.section_start_lines)
+        item.set_sections(sections, self.entry.sections_start_lines)
         item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
                         self.entry.parametertypes,
                         self.entry.parameterdesc_start_lines)
-- 
cgit v1.2.3


From 01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:14 +0100
Subject: docs: kernel-doc: add support to store output on a YAML file

Add a command line parameter and library support to optionally
store:
- KdocItem intermediate format after parsing;
- man pages output;
- rst output.

inside a YAML file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ba54277b3c909867153b9547dfa33c1831ca35d9.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/docs/kernel-doc                | 48 ++++++++++++++++++++++++++++++------
 tools/lib/python/kdoc/kdoc_files.py  | 47 ++++++++++++++++++++++++++++++-----
 tools/lib/python/kdoc/kdoc_parser.py | 27 +++++++++++++++++++-
 3 files changed, 107 insertions(+), 15 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/docs/kernel-doc b/tools/docs/kernel-doc
index 3a932f95bdf5..d9192c3f1645 100755
--- a/tools/docs/kernel-doc
+++ b/tools/docs/kernel-doc
@@ -240,11 +240,9 @@ def main():
                         help=EXPORT_FILE_DESC)
 
     #
-    # Output format mutually-exclusive group
+    # Output format
     #
-    out_group = parser.add_argument_group("Output format selection (mutually exclusive)")
-
-    out_fmt = out_group.add_mutually_exclusive_group()
+    out_fmt = parser.add_argument_group("Output format selection (mutually exclusive)")
 
     out_fmt.add_argument("-m", "-man", "--man", action="store_true",
                          help="Output troff manual page format.")
@@ -253,6 +251,12 @@ def main():
     out_fmt.add_argument("-N", "-none", "--none", action="store_true",
                          help="Do not output documentation, only warnings.")
 
+    out_fmt.add_argument("-y", "--yaml-file", "--yaml",
+                         help="Stores kernel-doc output on a yaml file.")
+    out_fmt.add_argument("-k", "--kdoc-item", "--kdoc", action="store_true",
+                         help="Store KdocItem inside yaml file. Ued together with --yaml.")
+
+
     #
     # Output selection mutually-exclusive group
     #
@@ -323,14 +327,42 @@ def main():
     from kdoc.kdoc_files import KernelFiles             # pylint: disable=C0415
     from kdoc.kdoc_output import RestFormat, ManFormat  # pylint: disable=C0415
 
-    if args.man:
-        out_style = ManFormat(modulename=args.modulename)
-    elif args.none:
+    yaml_content = set()
+    if args.yaml_file:
         out_style = None
+
+        if args.man:
+            yaml_content |= {"man"}
+
+        if args.rst:
+            yaml_content |= {"rst"}
+
+        if args.kdoc_item or not yaml_content:
+            yaml_content |= {"KdocItem"}
+
     else:
-        out_style = RestFormat()
+        n_outputs = 0
+
+        if args.man:
+            out_style = ManFormat(modulename=args.modulename)
+            n_outputs += 1
+
+        if args.none:
+            out_style = None
+            n_outputs += 1
+
+        if  args.rst or n_outputs == 0:
+            n_outputs += 1
+            out_style = RestFormat()
+
+        if n_outputs > 1:
+             parser.error("Those arguments are muttually exclusive: --man, --rst, --none, except when generating a YAML file.")
+
+        elif not n_outputs:
+            out_style = RestFormat()
 
     kfiles = KernelFiles(verbose=args.verbose,
+                         yaml_file=args.yaml_file, yaml_content=yaml_content,
                          out_style=out_style, werror=args.werror,
                          wreturn=args.wreturn, wshort_desc=args.wshort_desc,
                          wcontents_before_sections=args.wcontents_before_sections)
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 58f4ee08e226..5a299ed44d62 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -16,6 +16,7 @@ import re
 from kdoc.kdoc_parser import KernelDoc
 from kdoc.xforms_lists import CTransforms
 from kdoc.kdoc_output import OutputFormat
+from kdoc.kdoc_yaml_file import KDocTestFile
 
 
 class GlobSourceFiles:
@@ -152,6 +153,12 @@ class KernelFiles():
 
         If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
 
+    ``yaml_file``
+        If defined, stores the output inside a YAML file.
+
+    ``yaml_content``
+        Defines what will be inside the YAML file.
+
     Note:
         There are two type of parsers defined here:
 
@@ -181,7 +188,12 @@ class KernelFiles():
         if fname in self.files:
             return
 
-        doc = KernelDoc(self.config, fname, self.xforms)
+        if self.test_file:
+            store_src = True
+        else:
+            store_src = False
+
+        doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src)
         export_table, entries = doc.parse_kdoc()
 
         self.export_table[fname] = export_table
@@ -191,6 +203,10 @@ class KernelFiles():
 
         self.results[fname] = entries
 
+        source = doc.get_source()
+        if source:
+            self.source[fname] = source
+
     def process_export_file(self, fname):
         """
         Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
@@ -220,7 +236,7 @@ class KernelFiles():
     def __init__(self, verbose=False, out_style=None, xforms=None,
                  werror=False, wreturn=False, wshort_desc=False,
                  wcontents_before_sections=False,
-                 logger=None):
+                 yaml_file=None, yaml_content=None, logger=None):
         """
         Initialize startup variables and parse all files.
         """
@@ -259,6 +275,11 @@ class KernelFiles():
         # Override log warning, as we want to count errors
         self.config.warning = self.warning
 
+        if yaml_file:
+            self.test_file = KDocTestFile(self.config, yaml_file, yaml_content)
+        else:
+            self.test_file = None
+
         if xforms:
             self.xforms = xforms
         else:
@@ -273,6 +294,7 @@ class KernelFiles():
 
         self.errors = 0
         self.results = {}
+        self.source = {}
 
         self.files = set()
         self.export_files = set()
@@ -331,16 +353,29 @@ class KernelFiles():
                 for s in symbol:
                     function_table.add(s)
 
-            self.out_style.set_filter(export, internal, symbol, nosymbol,
-                                      function_table, enable_lineno,
-                                      no_doc_sections)
-
             if fname not in self.results:
                 self.config.log.warning("No kernel-doc for file %s", fname)
                 continue
 
             symbols = self.results[fname]
 
+            if self.test_file:
+                self.test_file.set_filter(export, internal, symbol, nosymbol,
+                                          function_table, enable_lineno,
+                                          no_doc_sections)
+
+                self.test_file.output_symbols(fname, symbols,
+                                              self.source.get(fname))
+
+                continue
+
+            self.out_style.set_filter(export, internal, symbol, nosymbol,
+                                      function_table, enable_lineno,
+                                      no_doc_sections)
+
             msg = self.out_style.output_symbols(fname, symbols)
             if msg:
                 yield fname, msg
+
+        if self.test_file:
+            self.test_file.write()
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 35658a7e72d5..a10e64589d76 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -246,12 +246,13 @@ class KernelDoc:
     #: String to write when a parameter is not described.
     undescribed = "-- undescribed --"
 
-    def __init__(self, config, fname, xforms):
+    def __init__(self, config, fname, xforms, store_src=False):
         """Initialize internal variables"""
 
         self.fname = fname
         self.config = config
         self.xforms = xforms
+        self.store_src = store_src
 
         tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
 
@@ -264,6 +265,9 @@ class KernelDoc:
         # Place all potential outputs into an array
         self.entries = []
 
+        # When store_src is true, the kernel-doc source content is stored here
+        self.source = None
+
         #
         # We need Python 3.7 for its "dicts remember the insertion
         # order" guarantee
@@ -1592,6 +1596,15 @@ class KernelDoc:
         state.DOCBLOCK:			process_docblock,
         }
 
+    def get_source(self):
+        """
+        Return the file content of the lines handled by kernel-doc at the
+        latest parse_kdoc() run.
+
+        Returns none if KernelDoc() was not initialized with store_src,
+        """
+        return self.source
+
     def parse_kdoc(self):
         """
         Open and process each line of a C source file.
@@ -1605,6 +1618,8 @@ class KernelDoc:
         prev = ""
         prev_ln = None
         export_table = set()
+        self.source = []
+        self.state = state.NORMAL
 
         try:
             with open(self.fname, "r", encoding="utf8",
@@ -1631,6 +1646,8 @@ class KernelDoc:
                                           ln, state.name[self.state],
                                           line)
 
+                    prev_state = self.state
+
                     # This is an optimization over the original script.
                     # There, when export_file was used for the same file,
                     # it was read twice. Here, we use the already-existing
@@ -1641,6 +1658,14 @@ class KernelDoc:
                         # Hand this line to the appropriate state handler
                         self.state_actions[self.state](self, ln, line)
 
+                    if self.store_src and prev_state != self.state or self.state != state.NORMAL:
+                        if self.state == state.NAME:
+                            # A "/**" was detected. Add a new source element
+                            self.source.append({"ln": ln, "data": line + "\n"})
+                        else:
+                            # Append to the existing one
+                            self.source[-1]["data"] += line + "\n"
+
             self.emit_unused_warnings()
 
         except OSError:
-- 
cgit v1.2.3


From 99ec67a9984fdf38c7ed78695aeb1b99cfee5b50 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:50 +0100
Subject: docs: kdoc: better handle source when producing YAML output

The current logic was storing symbols source code on a list,
not linked to the actual KdocItem. While this works fine when
kernel-doc markups are OK, on places where there is a "/**"
without a valid kernel-doc markup, it ends that the 1:1 match
between source code and KdocItem doesn't happen, causing
problems to generate the YAML output.

Fix it by storing the source code directly into the KdocItem
structure.

This shouldn't affect performance or memory footprint, except
when --yaml option is used.

While here, add a __repr__() function for KdocItem, as it
helps debugging it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <77902dafabb5c3250486aa2dc1568d5fafa95c5b.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py     |   8 +--
 tools/lib/python/kdoc/kdoc_item.py      |   6 +-
 tools/lib/python/kdoc/kdoc_parser.py    | 100 ++++++++++++++++----------------
 tools/lib/python/kdoc/kdoc_yaml_file.py |  28 ++++-----
 tools/unittests/test_kdoc_parser.py     |   9 +++
 5 files changed, 79 insertions(+), 72 deletions(-)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 5a299ed44d62..2428cfc4e843 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -203,10 +203,6 @@ class KernelFiles():
 
         self.results[fname] = entries
 
-        source = doc.get_source()
-        if source:
-            self.source[fname] = source
-
     def process_export_file(self, fname):
         """
         Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
@@ -294,7 +290,6 @@ class KernelFiles():
 
         self.errors = 0
         self.results = {}
-        self.source = {}
 
         self.files = set()
         self.export_files = set()
@@ -364,8 +359,7 @@ class KernelFiles():
                                           function_table, enable_lineno,
                                           no_doc_sections)
 
-                self.test_file.output_symbols(fname, symbols,
-                                              self.source.get(fname))
+                self.test_file.output_symbols(fname, symbols)
 
                 continue
 
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index fe08cac861c2..a7aa6e1e4c1c 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -14,7 +14,8 @@ class KdocItem:
     then pass into the output modules.
     """
 
-    def __init__(self, name, fname, type, start_line, **other_stuff):
+    def __init__(self, name, fname, type, start_line,
+                 **other_stuff):
         self.name = name
         self.fname = fname
         self.type = type
@@ -60,6 +61,9 @@ class KdocItem:
     def __getitem__(self, key):
         return self.get(key)
 
+    def __repr__(self):
+        return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})"
+
     @classmethod
     def from_dict(cls, d):
         """Create a KdocItem from a plain dict."""
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index a10e64589d76..74af7ae47aa4 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -265,9 +265,6 @@ class KernelDoc:
         # Place all potential outputs into an array
         self.entries = []
 
-        # When store_src is true, the kernel-doc source content is stored here
-        self.source = None
-
         #
         # We need Python 3.7 for its "dicts remember the insertion
         # order" guarantee
@@ -720,13 +717,14 @@ class KernelDoc:
         return declaration
 
 
-    def dump_struct(self, ln, proto):
+    def dump_struct(self, ln, proto, source):
         """
         Store an entry for a ``struct`` or ``union``
         """
         #
         # Do the basic parse to get the pieces of the declaration.
         #
+        source = source
         proto = trim_private_members(proto)
         struct_parts = self.split_struct_proto(proto)
         if not struct_parts:
@@ -756,10 +754,11 @@ class KernelDoc:
                                    declaration_name)
         self.check_sections(ln, declaration_name, decl_type)
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 definition=self.format_struct_decl(declaration),
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_enum(self, ln, proto):
+    def dump_enum(self, ln, proto, source):
         """
         Store an ``enum`` inside self.entries array.
         """
@@ -767,6 +766,7 @@ class KernelDoc:
         # Strip preprocessor directives.  Note that this depends on the
         # trailing semicolon we added in process_proto_type().
         #
+        source = source
         proto = trim_private_members(proto)
         proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
         #
@@ -831,9 +831,10 @@ class KernelDoc:
                               f"Excess enum value '@{k}' description in '{declaration_name}'")
 
         self.output_declaration('enum', declaration_name,
+                                source=source,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_var(self, ln, proto):
+    def dump_var(self, ln, proto, source):
         """
         Store variables that are part of kAPI.
         """
@@ -846,6 +847,7 @@ class KernelDoc:
         #
         # Store the full prototype before modifying it
         #
+        source = source
         full_proto = proto
         declaration_name = None
 
@@ -895,32 +897,34 @@ class KernelDoc:
             default_val = default_val.lstrip("=").strip()
 
         self.output_declaration("var", declaration_name,
+                                source=source,
                                 full_proto=full_proto,
                                 default_val=default_val,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_declaration(self, ln, prototype):
+    def dump_declaration(self, ln, prototype, source):
         """
         Store a data declaration inside self.entries array.
         """
 
         if self.entry.decl_type == "enum":
-            self.dump_enum(ln, prototype)
+            self.dump_enum(ln, prototype, source)
         elif self.entry.decl_type == "typedef":
-            self.dump_typedef(ln, prototype)
+            self.dump_typedef(ln, prototype, source)
         elif self.entry.decl_type in ["union", "struct"]:
-            self.dump_struct(ln, prototype)
+            self.dump_struct(ln, prototype, source)
         elif self.entry.decl_type == "var":
-            self.dump_var(ln, prototype)
+            self.dump_var(ln, prototype, source)
         else:
             # This would be a bug
             self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
 
-    def dump_function(self, ln, prototype):
+    def dump_function(self, ln, prototype, source):
         """
         Store a function or function macro inside self.entries array.
         """
 
+        source = source
         found = func_macro = False
         return_type = ''
         decl_type = 'function'
@@ -1013,13 +1017,14 @@ class KernelDoc:
         # Store the result.
         #
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 typedef=('typedef' in return_type),
                                 functiontype=return_type,
                                 purpose=self.entry.declaration_purpose,
                                 func_macro=func_macro)
 
 
-    def dump_typedef(self, ln, proto):
+    def dump_typedef(self, ln, proto, source):
         """
         Store a ``typedef`` inside self.entries array.
         """
@@ -1030,6 +1035,8 @@ class KernelDoc:
         typedef_ident = r'\*?\s*(\w\S+)\s*'
         typedef_args = r'\s*\((.*)\);'
 
+        source = source
+
         typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
         typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
 
@@ -1050,6 +1057,7 @@ class KernelDoc:
             self.create_parameter_list(ln, 'function', args, ',', declaration_name)
 
             self.output_declaration('function', declaration_name,
+                                    source=source,
                                     typedef=True,
                                     functiontype=return_type,
                                     purpose=self.entry.declaration_purpose)
@@ -1067,6 +1075,7 @@ class KernelDoc:
                 return
 
             self.output_declaration('typedef', declaration_name,
+                                    source=source,
                                     purpose=self.entry.declaration_purpose)
             return
 
@@ -1104,7 +1113,7 @@ class KernelDoc:
         function_set.add(symbol)
         return True
 
-    def process_normal(self, ln, line):
+    def process_normal(self, ln, line, source):
         """
         STATE_NORMAL: looking for the ``/**`` to begin everything.
         """
@@ -1118,7 +1127,7 @@ class KernelDoc:
         # next line is always the function name
         self.state = state.NAME
 
-    def process_name(self, ln, line):
+    def process_name(self, ln, line, source):
         """
         STATE_NAME: Looking for the "name - description" line
         """
@@ -1251,7 +1260,7 @@ class KernelDoc:
         return False
 
 
-    def process_decl(self, ln, line):
+    def process_decl(self, ln, line, source):
         """
         STATE_DECLARATION: We've seen the beginning of a declaration.
         """
@@ -1280,7 +1289,7 @@ class KernelDoc:
             self.emit_msg(ln, f"bad line: {line}")
 
 
-    def process_special(self, ln, line):
+    def process_special(self, ln, line, source):
         """
         STATE_SPECIAL_SECTION: a section ending with a blank line.
         """
@@ -1331,7 +1340,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_body(self, ln, line):
+    def process_body(self, ln, line, source):
         """
         STATE_BODY: the bulk of a kerneldoc comment.
         """
@@ -1345,7 +1354,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_inline_name(self, ln, line):
+    def process_inline_name(self, ln, line, source):
         """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
 
         if doc_inline_sect.search(line):
@@ -1363,10 +1372,10 @@ class KernelDoc:
             # Don't let it add partial comments at the code, as breaks the
             # logic meant to remove comments from prototypes.
             #
-            self.process_proto_type(ln, "/**\n" + line)
+            self.process_proto_type(ln, "/**\n" + line, source)
         # else ... ??
 
-    def process_inline_text(self, ln, line):
+    def process_inline_text(self, ln, line, source):
         """STATE_INLINE_TEXT: docbook comments within a prototype."""
 
         if doc_inline_end.search(line):
@@ -1452,7 +1461,7 @@ class KernelDoc:
 
         return proto
 
-    def process_proto_function(self, ln, line):
+    def process_proto_function(self, ln, line, source):
         """Ancillary routine to process a function prototype."""
 
         # strip C99-style comments to end of line
@@ -1494,10 +1503,10 @@ class KernelDoc:
             #
             # ... and we're done
             #
-            self.dump_function(ln, self.entry.prototype)
+            self.dump_function(ln, self.entry.prototype, source)
             self.reset_state(ln)
 
-    def process_proto_type(self, ln, line):
+    def process_proto_type(self, ln, line, source):
         """
         Ancillary routine to process a type.
         """
@@ -1527,7 +1536,7 @@ class KernelDoc:
                 elif chunk == '}':
                     self.entry.brcount -= 1
                 elif chunk == ';' and self.entry.brcount <= 0:
-                    self.dump_declaration(ln, self.entry.prototype)
+                    self.dump_declaration(ln, self.entry.prototype, source)
                     self.reset_state(ln)
                     return
         #
@@ -1536,7 +1545,7 @@ class KernelDoc:
         #
         self.entry.prototype += ' '
 
-    def process_proto(self, ln, line):
+    def process_proto(self, ln, line, source):
         """STATE_PROTO: reading a function/whatever prototype."""
 
         if doc_inline_oneline.search(line):
@@ -1548,17 +1557,18 @@ class KernelDoc:
             self.state = state.INLINE_NAME
 
         elif self.entry.decl_type == 'function':
-            self.process_proto_function(ln, line)
+            self.process_proto_function(ln, line, source)
 
         else:
-            self.process_proto_type(ln, line)
+            self.process_proto_type(ln, line, source)
 
-    def process_docblock(self, ln, line):
+    def process_docblock(self, ln, line, source):
         """STATE_DOCBLOCK: within a ``DOC:`` block."""
 
         if doc_end.search(line):
             self.dump_section()
-            self.output_declaration("doc", self.entry.identifier)
+            self.output_declaration("doc", self.entry.identifier,
+                                    source=source)
             self.reset_state(ln)
 
         elif doc_content.search(line):
@@ -1596,15 +1606,6 @@ class KernelDoc:
         state.DOCBLOCK:			process_docblock,
         }
 
-    def get_source(self):
-        """
-        Return the file content of the lines handled by kernel-doc at the
-        latest parse_kdoc() run.
-
-        Returns none if KernelDoc() was not initialized with store_src,
-        """
-        return self.source
-
     def parse_kdoc(self):
         """
         Open and process each line of a C source file.
@@ -1618,8 +1619,8 @@ class KernelDoc:
         prev = ""
         prev_ln = None
         export_table = set()
-        self.source = []
         self.state = state.NORMAL
+        source = ""
 
         try:
             with open(self.fname, "r", encoding="utf8",
@@ -1646,7 +1647,11 @@ class KernelDoc:
                                           ln, state.name[self.state],
                                           line)
 
-                    prev_state = self.state
+                    if self.store_src:
+                        if source and self.state == state.NORMAL:
+                            source = ""
+                        elif self.state != state.NORMAL:
+                            source += line + "\n"
 
                     # This is an optimization over the original script.
                     # There, when export_file was used for the same file,
@@ -1655,16 +1660,11 @@ class KernelDoc:
                     #
                     if (self.state != state.NORMAL) or \
                        not self.process_export(export_table, line):
+                        prev_state = self.state
                         # Hand this line to the appropriate state handler
-                        self.state_actions[self.state](self, ln, line)
-
-                    if self.store_src and prev_state != self.state or self.state != state.NORMAL:
-                        if self.state == state.NAME:
-                            # A "/**" was detected. Add a new source element
-                            self.source.append({"ln": ln, "data": line + "\n"})
-                        else:
-                            # Append to the existing one
-                            self.source[-1]["data"] += line + "\n"
+                        self.state_actions[self.state](self, ln, line, source)
+                        if prev_state == state.NORMAL and self.state != state.NORMAL:
+                            source += line + "\n"
 
             self.emit_unused_warnings()
 
diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
index 18737abb1176..1e2ae7c59d70 100644
--- a/tools/lib/python/kdoc/kdoc_yaml_file.py
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -85,7 +85,7 @@ class KDocTestFile():
 
         return d
 
-    def output_symbols(self, fname, symbols, source):
+    def output_symbols(self, fname, symbols):
         """
         Store source, symbols and output strings at self.tests.
         """
@@ -96,16 +96,10 @@ class KDocTestFile():
         kdoc_item = []
         expected = []
 
-        if not symbols and not source:
-            return
-
-        if not source or len(symbols) != len(source):
-            print(f"Warning: lengths are different. Ignoring {fname}")
-
-            # Folding without line numbers is too hard.
-            # The right thing to do here to proceed would be to delete
-            # not-handled source blocks, as len(source) should be bigger
-            # than len(symbols)
+        #
+        # Source code didn't produce any symbol
+        #
+        if not symbols:
             return
 
         base_name = "test_" + fname.replace(".", "_").replace("/", "_")
@@ -115,9 +109,15 @@ class KDocTestFile():
         for i in range(0, len(symbols)):
             arg = symbols[i]
 
-            if "KdocItem" in self.yaml_content:
+            source = arg.get("source", "")
+
+            if arg and "KdocItem" in self.yaml_content:
                 msg = self.get_kdoc_item(arg)
 
+                other_stuff = msg.get("other_stuff", {})
+                if "source" in other_stuff:
+                    del other_stuff["source"]
+
                 expected_dict["kdoc_item"] = msg
 
             for out_style in self.out_style:
@@ -132,9 +132,9 @@ class KDocTestFile():
 
             test = {
                 "name": name,
-                "description": f"{fname} line {source[i]["ln"]}",
+                "description": f"{fname} line {arg.declaration_start_line}",
                 "fname": fname,
-                "source": source[i]["data"],
+                "source": source,
                 "expected": [expected_dict]
             }
 
diff --git a/tools/unittests/test_kdoc_parser.py b/tools/unittests/test_kdoc_parser.py
index f2250ef192ce..c4a76ed13dbc 100755
--- a/tools/unittests/test_kdoc_parser.py
+++ b/tools/unittests/test_kdoc_parser.py
@@ -167,7 +167,16 @@ class GenerateKdocItem(unittest.TestCase):
                 self.assertIsInstance(entry, KdocItem)
 
                 d = vars(entry)
+
+                other_stuff = d.get("other_stuff", {})
+                if "source" in other_stuff:
+                    del other_stuff["source"]
+
                 for key, value in expected.items():
+                    if key == "other_stuff":
+                        if "source" in value:
+                            del value["source"]
+
                     result = clean_whitespc(d[key], relax_whitespace)
                     value = clean_whitespc(value, relax_whitespace)
 
-- 
cgit v1.2.3


From 6fa6b5cb60490db2591bb93872b95f72315e5f53 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sat, 18 Apr 2026 12:21:37 -0700
Subject: docs: kdoc: Expand 'at_least' when creating parameter list

sphinx doesn't know that the kernel headers do:

    #define at_least static

Do this replacement before declarations are passed to it.

This prevents errors like the following from appearing once the
lib/crypto/ kernel-doc is wired up to the sphinx build:

   linux/Documentation/crypto/libcrypto:128: ./include/crypto/sha2.h:773: WARNING: Error in declarator or parameters
Error in declarator or parameters
Invalid C declaration: Expected ']' in end of array operator. [error at 59]
  void sha512_final (struct sha512_ctx *ctx, u8 out[at_least SHA512_DIGEST_SIZE])

Acked-by: Jonathan Corbet <corbet@lwn.net>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20260418192138.15556-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools/lib/python/kdoc/kdoc_parser.py')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..901e02e3c043 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -571,6 +571,11 @@ class KernelDoc:
             # Ignore argument attributes
             arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
 
+            # Replace '[at_least ' with '[static '.  This allows sphinx to parse
+            # array parameter declarations like 'char A[at_least 4]', where
+            # 'at_least' is #defined to 'static' by the kernel headers.
+            arg = arg.replace('[at_least ', '[static ')
+
             # Strip leading/trailing spaces
             arg = arg.strip()
             arg = KernRe(r'\s+').sub(' ', arg, count=1)
-- 
cgit v1.2.3