From fc973dcd73f242480c61eccb1aa7306adafd2907 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 29 Jul 2025 18:43:03 +0200 Subject: docs: kernel-doc: avoid script crash on ancient Python While we do need at least 3.6 for kernel-doc to work, and at least 3.7 for it to output functions and structs with parameters at the right order, let the python binary be compatible with legacy versions. The rationale is that the Kernel build nowadays calls kernel-doc with -none on some places. Better not to bail out when older versions are found. With that, potentially this will run with python 2.7 and 3.2+, according with vermin: $ vermin --no-tips -v ./scripts/kernel-doc Detecting python files.. Analyzing using 24 processes.. 2.7, 3.2 /new_devel/v4l/docs/scripts/kernel-doc Minimum required versions: 2.7, 3.2 3.2 minimal requirement is due to argparse. The minimal version I could check was version 3.4 (using anaconda). Anaconda doesn't support 3.2 or 3.3 anymore, and 3.2 doesn't even compile (I tested compiling Python 3.2 on Fedora 42 and on Fedora 32 - no show). With 3.4, the script didn't crash and emitted the right warning: $ conda create -n py34 python=3.4 $ conda activate py34 python --version Python 3.4.5 $ python ./scripts/kernel-doc --none include/media Error: Python 3.6 or later is required by kernel-doc $ conda deactivate $ python --version Python 3.13.5 $ python ./scripts/kernel-doc --none include/media (no warnings and script ran properly) Supporting 2.7 is out of scope, as it is EOL for 5 years, and changing shebang to point to "python" instead of "python3" would have a wider impact. I did some extra checks about the differences from 3.2 and 3.4, and didn't find anything that would cause troubles: grep -rE "yield from|asyncio|pathlib|async|await|enum" scripts/kernel-doc Also, it doesn't use "@" operator. So, I'm confident that it should run (producing the exit warning) since Python 3.2. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/87d55e76b0b1391cb7a83e3e965dbddb83fa9786.1753806485.git.mchehab+huawei@kernel.org --- scripts/kernel-doc.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'scripts/kernel-doc.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index fc3d46ef519f..d9fe2bcbd39c 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -2,8 +2,17 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright(c) 2025: Mauro Carvalho Chehab . # -# pylint: disable=C0103,R0915 -# +# pylint: disable=C0103,R0912,R0914,R0915 + +# NOTE: While kernel-doc requires at least version 3.6 to run, the +# command line should work with Python 3.2+ (tested with 3.4). +# The rationale is that it shall fail gracefully during Kernel +# compilation with older Kernel versions. Due to that: +# - encoding line is needed here; +# - no f-strings can be used on this file. +# - the libraries that require newer versions can only be included +# after Python version is checked. + # Converted from the kernel-doc script originally written in Perl # under GPLv2, copyrighted since 1998 by the following authors: # @@ -107,9 +116,6 @@ SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -from kdoc_files import KernelFiles # pylint: disable=C0413 -from kdoc_output import RestFormat, ManFormat # pylint: disable=C0413 - DESC = """ Read C language source or header FILEs, extract embedded documentation comments, and print formatted documentation to standard output. @@ -273,14 +279,22 @@ def main(): python_ver = sys.version_info[:2] if python_ver < (3,6): - logger.warning("Python 3.6 or later is required by kernel-doc") + # Depending on Kernel configuration, kernel-doc --none is called at + # build time. As we don't want to break compilation due to the + # usage of an old Python version, return 0 here. + if args.none: + logger.error("Python 3.6 or later is required by kernel-doc. skipping checks") + sys.exit(0) - # Return 0 here to avoid breaking compilation - sys.exit(0) + sys.exit("Python 3.6 or later is required by kernel-doc. Aborting.") if python_ver < (3,7): logger.warning("Python 3.7 or later is required for correct results") + # Import kernel-doc libraries only after checking Python version + from kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + if args.man: out_style = ManFormat(modulename=args.modulename) elif args.none: @@ -308,11 +322,11 @@ def main(): sys.exit(0) if args.werror: - print(f"{error_count} warnings as errors") + print("%s warnings as errors" % error_count) # pylint: disable=C0209 sys.exit(error_count) if args.verbose: - print(f"{error_count} errors") + print("%s errors" % error_count) # pylint: disable=C0209 if args.none: sys.exit(0) -- cgit v1.2.3 From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- .pylintrc | 2 +- Documentation/Makefile | 2 +- Documentation/sphinx/kernel_abi.py | 2 +- Documentation/sphinx/kernel_include.py | 2 +- Documentation/sphinx/kerneldoc.py | 2 +- MAINTAINERS | 3 +- scripts/jobserver-exec | 2 +- scripts/kernel-doc.py | 2 +- scripts/lib/abi/abi_parser.py | 628 ---------- scripts/lib/abi/abi_regex.py | 234 ---- scripts/lib/abi/helpers.py | 38 - scripts/lib/abi/system_symbols.py | 378 ------ scripts/lib/jobserver.py | 149 --- scripts/lib/kdoc/kdoc_files.py | 294 ----- scripts/lib/kdoc/kdoc_item.py | 43 - scripts/lib/kdoc/kdoc_output.py | 824 ------------- scripts/lib/kdoc/kdoc_parser.py | 1667 --------------------------- scripts/lib/kdoc/kdoc_re.py | 270 ----- tools/docs/check-variable-fonts.py | 8 +- tools/docs/get_abi.py | 2 +- tools/docs/lib/__init__.py | 0 tools/docs/lib/enrich_formatter.py | 70 -- tools/docs/lib/latex_fonts.py | 167 --- tools/docs/lib/parse_data_structs.py | 482 -------- tools/docs/lib/python_version.py | 178 --- tools/docs/parse-headers.py | 9 +- tools/docs/sphinx-build-wrapper | 7 +- tools/docs/sphinx-pre-install | 5 +- tools/lib/python/abi/abi_parser.py | 628 ++++++++++ tools/lib/python/abi/abi_regex.py | 234 ++++ tools/lib/python/abi/helpers.py | 38 + tools/lib/python/abi/system_symbols.py | 378 ++++++ tools/lib/python/jobserver.py | 149 +++ tools/lib/python/kdoc/enrich_formatter.py | 70 ++ tools/lib/python/kdoc/kdoc_files.py | 294 +++++ tools/lib/python/kdoc/kdoc_item.py | 43 + tools/lib/python/kdoc/kdoc_output.py | 824 +++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 1667 +++++++++++++++++++++++++++ tools/lib/python/kdoc/kdoc_re.py | 270 +++++ tools/lib/python/kdoc/latex_fonts.py | 167 +++ tools/lib/python/kdoc/parse_data_structs.py | 482 ++++++++ tools/lib/python/kdoc/python_version.py | 178 +++ 42 files changed, 5451 insertions(+), 5441 deletions(-) delete mode 100644 scripts/lib/abi/abi_parser.py delete mode 100644 scripts/lib/abi/abi_regex.py delete mode 100644 scripts/lib/abi/helpers.py delete mode 100644 scripts/lib/abi/system_symbols.py delete mode 100755 scripts/lib/jobserver.py delete mode 100644 scripts/lib/kdoc/kdoc_files.py delete mode 100644 scripts/lib/kdoc/kdoc_item.py delete mode 100644 scripts/lib/kdoc/kdoc_output.py delete mode 100644 scripts/lib/kdoc/kdoc_parser.py delete mode 100644 scripts/lib/kdoc/kdoc_re.py delete mode 100644 tools/docs/lib/__init__.py delete mode 100644 tools/docs/lib/enrich_formatter.py delete mode 100755 tools/docs/lib/latex_fonts.py delete mode 100755 tools/docs/lib/parse_data_structs.py delete mode 100644 tools/docs/lib/python_version.py create mode 100644 tools/lib/python/abi/abi_parser.py create mode 100644 tools/lib/python/abi/abi_regex.py create mode 100644 tools/lib/python/abi/helpers.py create mode 100644 tools/lib/python/abi/system_symbols.py create mode 100755 tools/lib/python/jobserver.py create mode 100644 tools/lib/python/kdoc/enrich_formatter.py create mode 100644 tools/lib/python/kdoc/kdoc_files.py create mode 100644 tools/lib/python/kdoc/kdoc_item.py create mode 100644 tools/lib/python/kdoc/kdoc_output.py create mode 100644 tools/lib/python/kdoc/kdoc_parser.py create mode 100644 tools/lib/python/kdoc/kdoc_re.py create mode 100755 tools/lib/python/kdoc/latex_fonts.py create mode 100755 tools/lib/python/kdoc/parse_data_structs.py create mode 100644 tools/lib/python/kdoc/python_version.py (limited to 'scripts/kernel-doc.py') diff --git a/.pylintrc b/.pylintrc index 89eaf2100edd..8c6fc2b628b3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,2 +1,2 @@ [MASTER] -init-hook='import sys; sys.path += ["scripts/lib/kdoc", "scripts/lib/abi", "tools/docs/lib"]' +init-hook='import sys; sys.path += ["tools/lib/python"]' diff --git a/Documentation/Makefile b/Documentation/Makefile index c66df29cf0a3..fda2bef8d9d8 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -115,6 +115,6 @@ dochelp: @echo ' make PAPER={a4|letter} Specifies the paper size used for LaTeX/PDF output.' @echo @echo ' make FONTS_CONF_DENY_VF={path} sets a deny list to block variable Noto CJK fonts' - @echo ' for PDF build. See tools/docs/lib/latex_fonts.py for more details' + @echo ' for PDF build. See tools/lib/python/kdoc/latex_fonts.py for more details' @echo @echo ' Default location for the generated documents is Documentation/output' diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 32e39fb8bc3b..7ec832da8444 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,7 +43,7 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) from abi_parser import AbiParser diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index 75e139287d50..a12455daa6d7 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,7 +97,7 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from parse_data_structs import ParseDataStructs diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 2586b4d4e494..56f382a6bdf1 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,7 +42,7 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from kdoc_files import KernelFiles from kdoc_output import RestFormat diff --git a/MAINTAINERS b/MAINTAINERS index 8a9411e5c1e1..efe98e680c14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7412,8 +7412,7 @@ P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ F: scripts/kernel-doc* -F: scripts/lib/abi/* -F: scripts/lib/kdoc/* +F: tools/lib/python/* F: tools/docs/ F: tools/net/ynl/pyynl/lib/doc_generator.py X: Documentation/ABI/ diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index ae23afd344ec..758e947a6fb9 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -13,7 +13,7 @@ See: import os import sys -LIB_DIR = "lib" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index d9fe2bcbd39c..bb24bbf73167 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "lib/kdoc" +LIB_DIR = "../tools/lib/python/kdoc" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/lib/abi/abi_parser.py b/scripts/lib/abi/abi_parser.py deleted file mode 100644 index 66a738013ce1..000000000000 --- a/scripts/lib/abi/abi_parser.py +++ /dev/null @@ -1,628 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -from argparse import Namespace -import logging -import os -import re - -from pprint import pformat -from random import randrange, seed - -# Import Python modules - -from helpers import AbiDebug, ABI_DIR - - -class AbiParser: - """Main class to parse ABI files""" - - TAGS = r"(what|where|date|kernelversion|contact|description|users)" - XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" - - def __init__(self, directory, logger=None, - enable_lineno=False, show_warnings=True, debug=0): - """Stores arguments for the class and initialize class vars""" - - self.directory = directory - self.enable_lineno = enable_lineno - self.show_warnings = show_warnings - self.debug = debug - - if not logger: - self.log = logging.getLogger("get_abi") - else: - self.log = logger - - self.data = {} - self.what_symbols = {} - self.file_refs = {} - self.what_refs = {} - - # Ignore files that contain such suffixes - self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") - - # Regular expressions used on parser - self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) - self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) - self.re_valid = re.compile(self.TAGS) - self.re_start_spc = re.compile(r"(\s*)(\S.*)") - self.re_whitespace = re.compile(r"^\s+") - - # Regular used on print - self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") - self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") - self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") - self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") - self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") - self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") - self.re_xref_node = re.compile(self.XREF) - - def warn(self, fdata, msg, extra=None): - """Displays a parse error if warning is enabled""" - - if not self.show_warnings: - return - - msg = f"{fdata.fname}:{fdata.ln}: {msg}" - if extra: - msg += "\n\t\t" + extra - - self.log.warning(msg) - - def add_symbol(self, what, fname, ln=None, xref=None): - """Create a reference table describing where each 'what' is located""" - - if what not in self.what_symbols: - self.what_symbols[what] = {"file": {}} - - if fname not in self.what_symbols[what]["file"]: - self.what_symbols[what]["file"][fname] = [] - - if ln and ln not in self.what_symbols[what]["file"][fname]: - self.what_symbols[what]["file"][fname].append(ln) - - if xref: - self.what_symbols[what]["xref"] = xref - - def _parse_line(self, fdata, line): - """Parse a single line of an ABI file""" - - new_what = False - new_tag = False - content = None - - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - sep = match.group(2) - content = match.group(3) - - match = self.re_valid.search(new) - if match: - new_tag = match.group(1) - else: - if fdata.tag == "description": - # New "tag" is actually part of description. - # Don't consider it a tag - new_tag = False - elif fdata.tag != "": - self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) - - if new_tag: - # "where" is Invalid, but was a common mistake. Warn if found - if new_tag == "where": - self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") - new_tag = "what" - - if new_tag == "what": - fdata.space = None - - if content not in self.what_symbols: - self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) - - if fdata.tag == "what": - fdata.what.append(content.strip("\n")) - else: - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fdata.fname, - ln=fdata.what_ln, xref=fdata.key) - - fdata.label = content - new_what = True - - key = "abi_" + content.lower() - fdata.key = self.re_unprintable.sub("_", key).strip("_") - - # Avoid duplicated keys but using a defined seed, to make - # the namespace identical if there aren't changes at the - # ABI symbols - seed(42) - - while fdata.key in self.data: - char = randrange(0, 51) + ord("A") - if char > ord("Z"): - char += ord("a") - ord("Z") - 1 - - fdata.key += chr(char) - - if fdata.key and fdata.key not in self.data: - self.data[fdata.key] = { - "what": [content], - "file": [fdata.file_ref], - "path": fdata.ftype, - "line_no": fdata.ln, - } - - fdata.what = self.data[fdata.key]["what"] - - self.what_refs[content] = fdata.key - fdata.tag = new_tag - fdata.what_ln = fdata.ln - - if fdata.nametag["what"]: - t = (content, fdata.key) - if t not in fdata.nametag["symbols"]: - fdata.nametag["symbols"].append(t) - - return - - if fdata.tag and new_tag: - fdata.tag = new_tag - - if new_what: - fdata.label = "" - - if "description" in self.data[fdata.key]: - self.data[fdata.key]["description"] += "\n\n" - - if fdata.file_ref not in self.data[fdata.key]["file"]: - self.data[fdata.key]["file"].append(fdata.file_ref) - - if self.debug == AbiDebug.WHAT_PARSING: - self.log.debug("what: %s", fdata.what) - - if not fdata.what: - self.warn(fdata, "'What:' should come first:", line) - return - - if new_tag == "description": - fdata.space = None - - if content: - sep = sep.replace(":", " ") - - c = " " * len(new_tag) + sep + content - c = c.expandtabs() - - match = self.re_start_spc.match(c) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - content = match.group(2) + "\n" - - self.data[fdata.key][fdata.tag] = content - - return - - # Store any contents before tags at the database - if not fdata.tag and "what" in fdata.nametag: - fdata.nametag["description"] += line - return - - if fdata.tag == "description": - content = line.expandtabs() - - if self.re_whitespace.sub("", content) == "": - self.data[fdata.key][fdata.tag] += "\n" - return - - if fdata.space is None: - match = self.re_start_spc.match(content) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - - content = match.group(2) + "\n" - else: - if content.startswith(fdata.space): - content = content[len(fdata.space):] - - else: - fdata.space = "" - - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += content - return - - content = line.strip() - if fdata.tag: - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") - return - - # Everything else is error - if content: - self.warn(fdata, "Unexpected content", line) - - def parse_readme(self, nametag, fname): - """Parse ABI README file""" - - nametag["what"] = ["Introduction"] - nametag["path"] = "README" - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - - match = self.re_valid.search(new) - if match: - nametag["description"] += "\n:" + line - continue - - nametag["description"] += line - - def parse_file(self, fname, path, basename): - """Parse a single file""" - - ref = f"abi_file_{path}_{basename}" - ref = self.re_unprintable.sub("_", ref).strip("_") - - # Store per-file state into a namespace variable. This will be used - # by the per-line parser state machine and by the warning function. - fdata = Namespace - - fdata.fname = fname - fdata.name = basename - - pos = fname.find(ABI_DIR) - if pos > 0: - f = fname[pos:] - else: - f = fname - - fdata.file_ref = (f, ref) - self.file_refs[f] = ref - - fdata.ln = 0 - fdata.what_ln = 0 - fdata.tag = "" - fdata.label = "" - fdata.what = [] - fdata.key = None - fdata.xrefs = None - fdata.space = None - fdata.ftype = path.split("/")[0] - - fdata.nametag = {} - fdata.nametag["what"] = [f"ABI file {path}/{basename}"] - fdata.nametag["type"] = "File" - fdata.nametag["path"] = fdata.ftype - fdata.nametag["file"] = [fdata.file_ref] - fdata.nametag["line_no"] = 1 - fdata.nametag["description"] = "" - fdata.nametag["symbols"] = [] - - self.data[ref] = fdata.nametag - - if self.debug & AbiDebug.WHAT_OPEN: - self.log.debug("Opening file %s", fname) - - if basename == "README": - self.parse_readme(fdata.nametag, fname) - return - - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - fdata.ln += 1 - - self._parse_line(fdata, line) - - if "description" in fdata.nametag: - fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") - - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fname, xref=fdata.key) - - def _parse_abi(self, root=None): - """Internal function to parse documentation ABI recursively""" - - if not root: - root = self.directory - - with os.scandir(root) as obj: - for entry in obj: - name = os.path.join(root, entry.name) - - if entry.is_dir(): - self._parse_abi(name) - continue - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if basename.startswith("."): - continue - - if basename.endswith(self.ignore_suffixes): - continue - - path = self.re_abi_dir.sub("", os.path.dirname(name)) - - self.parse_file(name, path, basename) - - def parse_abi(self, root=None): - """Parse documentation ABI""" - - self._parse_abi(root) - - if self.debug & AbiDebug.DUMP_ABI_STRUCTS: - self.log.debug(pformat(self.data)) - - def desc_txt(self, desc): - """Print description as found inside ABI files""" - - desc = desc.strip(" \t\n") - - return desc + "\n\n" - - def xref(self, fname): - """ - Converts a Documentation/ABI + basename into a ReST cross-reference - """ - - xref = self.file_refs.get(fname) - if not xref: - return None - else: - return xref - - def desc_rst(self, desc): - """Enrich ReST output by creating cross-references""" - - # Remove title markups from the description - # Having titles inside ABI files will only work if extra - # care would be taken in order to strictly follow the same - # level order for each markup. - desc = self.re_title_mark.sub("\n\n", "\n" + desc) - desc = desc.rstrip(" \t\n").lstrip("\n") - - # Python's regex performance for non-compiled expressions is a lot - # than Perl, as Perl automatically caches them at their - # first usage. Here, we'll need to do the same, as otherwise the - # performance penalty is be high - - new_desc = "" - for d in desc.split("\n"): - if d == "": - new_desc += "\n" - continue - - # Use cross-references for doc files where needed - d = self.re_doc.sub(r":doc:`/\1`", d) - - # Use cross-references for ABI generated docs where needed - matches = self.re_abi.findall(d) - for m in matches: - abi = m[0] + m[1] - - xref = self.file_refs.get(abi) - if not xref: - # This may happen if ABI is on a separate directory, - # like parsing ABI testing and symbol is at stable. - # The proper solution is to move this part of the code - # for it to be inside sphinx/kernel_abi.py - self.log.info("Didn't find ABI reference for '%s'", abi) - else: - new = self.re_escape.sub(r"\\\1", m[1]) - d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) - - # Seek for cross reference symbols like /sys/... - # Need to be careful to avoid doing it on a code block - if d[0] not in [" ", "\t"]: - matches = self.re_xref_node.findall(d) - for m in matches: - # Finding ABI here is more complex due to wildcards - xref = self.what_refs.get(m) - if xref: - new = self.re_escape.sub(r"\\\1", m) - d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) - - new_desc += d + "\n" - - return new_desc + "\n\n" - - def doc(self, output_in_txt=False, show_symbols=True, show_file=True, - filter_path=None): - """Print ABI at stdout""" - - part = None - for key, v in sorted(self.data.items(), - key=lambda x: (x[1].get("type", ""), - x[1].get("what"))): - - wtype = v.get("type", "Symbol") - file_ref = v.get("file") - names = v.get("what", [""]) - - if wtype == "File": - if not show_file: - continue - else: - if not show_symbols: - continue - - if filter_path: - if v.get("path") != filter_path: - continue - - msg = "" - - if wtype != "File": - cur_part = names[0] - if cur_part.find("/") >= 0: - match = self.re_what.match(cur_part) - if match: - symbol = match.group(1).rstrip("/") - cur_part = "Symbols under " + symbol - - if cur_part and cur_part != part: - part = cur_part - msg += part + "\n"+ "-" * len(part) +"\n\n" - - msg += f".. _{key}:\n\n" - - max_len = 0 - for i in range(0, len(names)): # pylint: disable=C0200 - names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" - - max_len = max(max_len, len(names[i])) - - msg += "+-" + "-" * max_len + "-+\n" - for name in names: - msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" - msg += "+-" + "-" * max_len + "-+\n" - msg += "\n" - - for ref in file_ref: - if wtype == "File": - msg += f".. _{ref[1]}:\n\n" - else: - base = os.path.basename(ref[0]) - msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" - - if wtype == "File": - msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" - - desc = v.get("description") - if not desc and wtype != "File": - msg += f"DESCRIPTION MISSING for {names[0]}\n\n" - - if desc: - if output_in_txt: - msg += self.desc_txt(desc) - else: - msg += self.desc_rst(desc) - - symbols = v.get("symbols") - if symbols: - msg += "Has the following ABI:\n\n" - - for w, label in symbols: - # Escape special chars from content - content = self.re_escape.sub(r"\\\1", w) - - msg += f"- :ref:`{content} <{label}>`\n\n" - - users = v.get("users") - if users and users.strip(" \t\n"): - users = users.strip("\n").replace('\n', '\n\t') - msg += f"Users:\n\t{users}\n\n" - - ln = v.get("line_no", 1) - - yield (msg, file_ref[0][0], ln) - - def check_issues(self): - """Warn about duplicated ABI entries""" - - for what, v in self.what_symbols.items(): - files = v.get("file") - if not files: - # Should never happen if the parser works properly - self.log.warning("%s doesn't have a file associated", what) - continue - - if len(files) == 1: - continue - - f = [] - for fname, lines in sorted(files.items()): - if not lines: - f.append(f"{fname}") - elif len(lines) == 1: - f.append(f"{fname}:{lines[0]}") - else: - m = fname + "lines " - m += ", ".join(str(x) for x in lines) - f.append(m) - - self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) - - def search_symbols(self, expr): - """ Searches for ABI symbols """ - - regex = re.compile(expr, re.I) - - found_keys = 0 - for t in sorted(self.data.items(), key=lambda x: [0]): - v = t[1] - - wtype = v.get("type", "") - if wtype == "File": - continue - - for what in v.get("what", [""]): - if regex.search(what): - found_keys += 1 - - kernelversion = v.get("kernelversion", "").strip(" \t\n") - date = v.get("date", "").strip(" \t\n") - contact = v.get("contact", "").strip(" \t\n") - users = v.get("users", "").strip(" \t\n") - desc = v.get("description", "").strip(" \t\n") - - files = [] - for f in v.get("file", ()): - files.append(f[0]) - - what = str(found_keys) + ". " + what - title_tag = "-" * len(what) - - print(f"\n{what}\n{title_tag}\n") - - if kernelversion: - print(f"Kernel version:\t\t{kernelversion}") - - if date: - print(f"Date:\t\t\t{date}") - - if contact: - print(f"Contact:\t\t{contact}") - - if users: - print(f"Users:\t\t\t{users}") - - print("Defined on file(s):\t" + ", ".join(files)) - - if desc: - desc = desc.strip("\n") - print(f"\n{desc}\n") - - if not found_keys: - print(f"Regular expression /{expr}/ not found.") diff --git a/scripts/lib/abi/abi_regex.py b/scripts/lib/abi/abi_regex.py deleted file mode 100644 index 8a57846cbc69..000000000000 --- a/scripts/lib/abi/abi_regex.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 -# xxpylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Convert ABI what into regular expressions -""" - -import re -import sys - -from pprint import pformat - -from abi_parser import AbiParser -from helpers import AbiDebug - -class AbiRegex(AbiParser): - """Extends AbiParser to search ABI nodes with regular expressions""" - - # Escape only ASCII visible characters - escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" - leave_others = "others" - - # Tuples with regular expressions to be compiled and replacement data - re_whats = [ - # Drop escape characters that might exist - (re.compile("\\\\"), ""), - - # Temporarily escape dot characters - (re.compile(r"\."), "\xf6"), - - # Temporarily change [0-9]+ type of patterns - (re.compile(r"\[0\-9\]\+"), "\xff"), - - # Temporarily change [\d+-\d+] type of patterns - (re.compile(r"\[0\-\d+\]"), "\xff"), - (re.compile(r"\[0:\d+\]"), "\xff"), - (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), - - # Temporarily change [0-9] type of patterns - (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), - - # Handle multiple option patterns - (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), - - # Handle wildcards - (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), - (re.compile(r"/\*/"), "/.*/"), - (re.compile(r"/\xf6\xf6\xf6"), "/.*"), - (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), - (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), - (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), - - (re.compile(r"XX+"), "\\\\w\xf7"), - (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), - (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), - (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), - - # Recover [0-9] type of patterns - (re.compile(r"\xf4"), "["), - (re.compile(r"\xf5"), "]"), - - # Remove duplicated spaces - (re.compile(r"\s+"), r" "), - - # Special case: drop comparison as in: - # What: foo = - # (this happens on a few IIO definitions) - (re.compile(r"\s*\=.*$"), ""), - - # Escape all other symbols - (re.compile(escape_symbols), r"\\\1"), - (re.compile(r"\\\\"), r"\\"), - (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), - (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), - - (re.compile(r"\xff"), r"\\d+"), - - # Special case: IIO ABI which a parenthesis. - (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), - - # Simplify regexes with multiple .* - (re.compile(r"(?:\.\*){2,}"), ""), - - # Recover dot characters - (re.compile(r"\xf6"), "\\."), - # Recover plus characters - (re.compile(r"\xf7"), "+"), - ] - re_has_num = re.compile(r"\\d") - - # Symbol name after escape_chars that are considered a devnode basename - re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") - - # List of popular group names to be skipped to minimize regex group size - # Use AbiDebug.SUBGROUP_SIZE to detect those - skip_names = set(["devices", "hwmon"]) - - def regex_append(self, what, new): - """ - Get a search group for a subset of regular expressions. - - As ABI may have thousands of symbols, using a for to search all - regular expressions is at least O(n^2). When there are wildcards, - the complexity increases substantially, eventually becoming exponential. - - To avoid spending too much time on them, use a logic to split - them into groups. The smaller the group, the better, as it would - mean that searches will be confined to a small number of regular - expressions. - - The conversion to a regex subset is tricky, as we need something - that can be easily obtained from the sysfs symbol and from the - regular expression. So, we need to discard nodes that have - wildcards. - - If it can't obtain a subgroup, place the regular expression inside - a special group (self.leave_others). - """ - - search_group = None - - for search_group in reversed(new.split("/")): - if not search_group or search_group in self.skip_names: - continue - if self.re_symbol_name.match(search_group): - break - - if not search_group: - search_group = self.leave_others - - if self.debug & AbiDebug.SUBGROUP_MAP: - self.log.debug("%s: mapped as %s", what, search_group) - - try: - if search_group not in self.regex_group: - self.regex_group[search_group] = [] - - self.regex_group[search_group].append(re.compile(new)) - if self.search_string: - if what.find(self.search_string) >= 0: - print(f"What: {what}") - except re.PatternError: - self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" - " '%s'", what, new) - - def get_regexes(self, what): - """ - Given an ABI devnode, return a list of all regular expressions that - may match it, based on the sub-groups created by regex_append() - """ - - re_list = [] - - patches = what.split("/") - patches.reverse() - patches.append(self.leave_others) - - for search_group in patches: - if search_group in self.regex_group: - re_list += self.regex_group[search_group] - - return re_list - - def __init__(self, *args, **kwargs): - """ - Override init method to get verbose argument - """ - - self.regex_group = None - self.search_string = None - self.re_string = None - - if "search_string" in kwargs: - self.search_string = kwargs.get("search_string") - del kwargs["search_string"] - - if self.search_string: - - try: - self.re_string = re.compile(self.search_string) - except re.PatternError as e: - msg = f"{self.search_string} is not a valid regular expression" - raise ValueError(msg) from e - - super().__init__(*args, **kwargs) - - def parse_abi(self, *args, **kwargs): - - super().parse_abi(*args, **kwargs) - - self.regex_group = {} - - print("Converting ABI What fields into regexes...", file=sys.stderr) - - for t in sorted(self.data.items(), key=lambda x: x[0]): - v = t[1] - if v.get("type") == "File": - continue - - v["regex"] = [] - - for what in v.get("what", []): - if not what.startswith("/sys"): - continue - - new = what - for r, s in self.re_whats: - try: - new = r.sub(s, new) - except re.PatternError as e: - # Help debugging troubles with new regexes - raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e - - v["regex"].append(new) - - if self.debug & AbiDebug.REGEX: - self.log.debug("%-90s <== %s", new, what) - - # Store regex into a subgroup to speedup searches - self.regex_append(what, new) - - if self.debug & AbiDebug.SUBGROUP_DICT: - self.log.debug("%s", pformat(self.regex_group)) - - if self.debug & AbiDebug.SUBGROUP_SIZE: - biggestd_keys = sorted(self.regex_group.keys(), - key= lambda k: len(self.regex_group[k]), - reverse=True) - - print("Top regex subgroups:", file=sys.stderr) - for k in biggestd_keys[:10]: - print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/scripts/lib/abi/helpers.py b/scripts/lib/abi/helpers.py deleted file mode 100644 index 639b23e4ca33..000000000000 --- a/scripts/lib/abi/helpers.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# pylint: disable=R0903 -# SPDX-License-Identifier: GPL-2.0 - -""" -Helper classes for ABI parser -""" - -ABI_DIR = "Documentation/ABI/" - - -class AbiDebug: - """Debug levels""" - - WHAT_PARSING = 1 - WHAT_OPEN = 2 - DUMP_ABI_STRUCTS = 4 - UNDEFINED = 8 - REGEX = 16 - SUBGROUP_MAP = 32 - SUBGROUP_DICT = 64 - SUBGROUP_SIZE = 128 - GRAPH = 256 - - -DEBUG_HELP = """ -1 - enable debug parsing logic -2 - enable debug messages on file open -4 - enable debug for ABI parse data -8 - enable extra debug information to identify troubles - with ABI symbols found at the local machine that - weren't found on ABI documentation (used only for - undefined subcommand) -16 - enable debug for what to regex conversion -32 - enable debug for symbol regex subgroups -64 - enable debug for sysfs graph tree variable -""" diff --git a/scripts/lib/abi/system_symbols.py b/scripts/lib/abi/system_symbols.py deleted file mode 100644 index f15c94a6e33c..000000000000 --- a/scripts/lib/abi/system_symbols.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0912,R0914,R0915,R1702 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import os -import re -import sys - -from concurrent import futures -from datetime import datetime -from random import shuffle - -from helpers import AbiDebug - -class SystemSymbols: - """Stores arguments for the class and initialize class vars""" - - def graph_add_file(self, path, link=None): - """ - add a file path to the sysfs graph stored at self.root - """ - - if path in self.files: - return - - name = "" - ref = self.root - for edge in path.split("/"): - name += edge + "/" - if edge not in ref: - ref[edge] = {"__name": [name.rstrip("/")]} - - ref = ref[edge] - - if link and link not in ref["__name"]: - ref["__name"].append(link.rstrip("/")) - - self.files.add(path) - - def print_graph(self, root_prefix="", root=None, level=0): - """Prints a reference tree graph using UTF-8 characters""" - - if not root: - root = self.root - level = 0 - - # Prevent endless traverse - if level > 5: - return - - if level > 0: - prefix = "├──" - last_prefix = "└──" - else: - prefix = "" - last_prefix = "" - - items = list(root.items()) - - names = root.get("__name", []) - for k, edge in items: - if k == "__name": - continue - - if not k: - k = "/" - - if len(names) > 1: - k += " links: " + ",".join(names[1:]) - - if edge == items[-1][1]: - print(root_prefix + last_prefix + k) - p = root_prefix - if level > 0: - p += " " - self.print_graph(p, edge, level + 1) - else: - print(root_prefix + prefix + k) - p = root_prefix + "│ " - self.print_graph(p, edge, level + 1) - - def _walk(self, root): - """ - Walk through sysfs to get all devnodes that aren't ignored. - - By default, uses /sys as sysfs mounting point. If another - directory is used, it replaces them to /sys at the patches. - """ - - with os.scandir(root) as obj: - for entry in obj: - path = os.path.join(root, entry.name) - if self.sysfs: - p = path.replace(self.sysfs, "/sys", count=1) - else: - p = path - - if self.re_ignore.search(p): - return - - # Handle link first to avoid directory recursion - if entry.is_symlink(): - real = os.path.realpath(path) - if not self.sysfs: - self.aliases[path] = real - else: - real = real.replace(self.sysfs, "/sys", count=1) - - # Add absfile location to graph if it doesn't exist - if not self.re_ignore.search(real): - # Add link to the graph - self.graph_add_file(real, p) - - elif entry.is_file(): - self.graph_add_file(p) - - elif entry.is_dir(): - self._walk(path) - - def __init__(self, abi, sysfs="/sys", hints=False): - """ - Initialize internal variables and get a list of all files inside - sysfs that can currently be parsed. - - Please notice that there are several entries on sysfs that aren't - documented as ABI. Ignore those. - - The real paths will be stored under self.files. Aliases will be - stored in separate, as self.aliases. - """ - - self.abi = abi - self.log = abi.log - - if sysfs != "/sys": - self.sysfs = sysfs.rstrip("/") - else: - self.sysfs = None - - self.hints = hints - - self.root = {} - self.aliases = {} - self.files = set() - - dont_walk = [ - # Those require root access and aren't documented at ABI - f"^{sysfs}/kernel/debug", - f"^{sysfs}/kernel/tracing", - f"^{sysfs}/fs/pstore", - f"^{sysfs}/fs/bpf", - f"^{sysfs}/fs/fuse", - - # This is not documented at ABI - f"^{sysfs}/module", - - f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI - f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings - "sections|notes", # aren't actually part of ABI - - # kernel-parameters.txt - not easy to parse - "parameters", - ] - - self.re_ignore = re.compile("|".join(dont_walk)) - - print(f"Reading {sysfs} directory contents...", file=sys.stderr) - self._walk(sysfs) - - def check_file(self, refs, found): - """Check missing ABI symbols for a given sysfs file""" - - res_list = [] - - try: - for names in refs: - fname = names[0] - - res = { - "found": False, - "fname": fname, - "msg": "", - } - res_list.append(res) - - re_what = self.abi.get_regexes(fname) - if not re_what: - self.abi.log.warning(f"missing rules for {fname}") - continue - - for name in names: - for r in re_what: - if self.abi.debug & AbiDebug.UNDEFINED: - self.log.debug("check if %s matches '%s'", name, r.pattern) - if r.match(name): - res["found"] = True - if found: - res["msg"] += f" {fname}: regex:\n\t" - continue - - if self.hints and not res["found"]: - res["msg"] += f" {fname} not found. Tested regexes:\n" - for r in re_what: - res["msg"] += " " + r.pattern + "\n" - - except KeyboardInterrupt: - pass - - return res_list - - def _ref_interactor(self, root): - """Recursive function to interact over the sysfs tree""" - - for k, v in root.items(): - if isinstance(v, dict): - yield from self._ref_interactor(v) - - if root == self.root or k == "__name": - continue - - if self.abi.re_string: - fname = v["__name"][0] - if self.abi.re_string.search(fname): - yield v - else: - yield v - - - def get_fileref(self, all_refs, chunk_size): - """Interactor to group refs into chunks""" - - n = 0 - refs = [] - - for ref in all_refs: - refs.append(ref) - - n += 1 - if n >= chunk_size: - yield refs - n = 0 - refs = [] - - yield refs - - def check_undefined_symbols(self, max_workers=None, chunk_size=50, - found=None, dry_run=None): - """Seach ABI for sysfs symbols missing documentation""" - - self.abi.parse_abi() - - if self.abi.debug & AbiDebug.GRAPH: - self.print_graph() - - all_refs = [] - for ref in self._ref_interactor(self.root): - all_refs.append(ref["__name"]) - - if dry_run: - print("Would check", file=sys.stderr) - for ref in all_refs: - print(", ".join(ref)) - - return - - print("Starting to search symbols (it may take several minutes):", - file=sys.stderr) - start = datetime.now() - old_elapsed = None - - # Python doesn't support multithreading due to limitations on its - # global lock (GIL). While Python 3.13 finally made GIL optional, - # there are still issues related to it. Also, we want to have - # backward compatibility with older versions of Python. - # - # So, use instead multiprocess. However, Python is very slow passing - # data from/to multiple processes. Also, it may consume lots of memory - # if the data to be shared is not small. So, we need to group workload - # in chunks that are big enough to generate performance gains while - # not being so big that would cause out-of-memory. - - num_refs = len(all_refs) - print(f"Number of references to parse: {num_refs}", file=sys.stderr) - - if not max_workers: - max_workers = os.cpu_count() - elif max_workers > os.cpu_count(): - max_workers = os.cpu_count() - - max_workers = max(max_workers, 1) - - max_chunk_size = int((num_refs + max_workers - 1) / max_workers) - chunk_size = min(chunk_size, max_chunk_size) - chunk_size = max(1, chunk_size) - - if max_workers > 1: - executor = futures.ProcessPoolExecutor - - # Place references in a random order. This may help improving - # performance, by mixing complex/simple expressions when creating - # chunks - shuffle(all_refs) - else: - # Python has a high overhead with processes. When there's just - # one worker, it is faster to not create a new process. - # Yet, User still deserves to have a progress print. So, use - # python's "thread", which is actually a single process, using - # an internal schedule to switch between tasks. No performance - # gains for non-IO tasks, but still it can be quickly interrupted - # from time to time to display progress. - executor = futures.ThreadPoolExecutor - - not_found = [] - f_list = [] - with executor(max_workers=max_workers) as exe: - for refs in self.get_fileref(all_refs, chunk_size): - if refs: - try: - f_list.append(exe.submit(self.check_file, refs, found)) - - except KeyboardInterrupt: - return - - total = len(f_list) - - if not total: - if self.abi.re_string: - print(f"No ABI symbol matches {self.abi.search_string}") - else: - self.abi.log.warning("No ABI symbols found") - return - - print(f"{len(f_list):6d} jobs queued on {max_workers} workers", - file=sys.stderr) - - while f_list: - try: - t = futures.wait(f_list, timeout=1, - return_when=futures.FIRST_COMPLETED) - - done = t[0] - - for fut in done: - res_list = fut.result() - - for res in res_list: - if not res["found"]: - not_found.append(res["fname"]) - if res["msg"]: - print(res["msg"]) - - f_list.remove(fut) - except KeyboardInterrupt: - return - - except RuntimeError as e: - self.abi.log.warning(f"Future: {e}") - break - - if sys.stderr.isatty(): - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - if len(f_list) < total: - elapsed += f" ({total - len(f_list)}/{total} jobs completed). " - if elapsed != old_elapsed: - print(elapsed + "\r", end="", flush=True, - file=sys.stderr) - old_elapsed = elapsed - - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - print(elapsed, file=sys.stderr) - - for f in sorted(not_found): - print(f"{f} not found.") diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py deleted file mode 100755 index a24f30ef4fa8..000000000000 --- a/scripts/lib/jobserver.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# - -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - -import errno -import os -import subprocess -import sys - -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") - - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd, *args, **pwargs): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd, *args, **pwargs) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py deleted file mode 100644 index 1fd8d17edb32..000000000000 --- a/scripts/lib/kdoc/kdoc_files.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=R0903,R0913,R0914,R0917 - -""" -Parse lernel-doc tags on multiple kernel source files. -""" - -import argparse -import logging -import os -import re - -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat - - -class GlobSourceFiles: - """ - Parse C source code file names and directories via an Interactor. - """ - - def __init__(self, srctree=None, valid_extensions=None): - """ - Initialize valid extensions with a tuple. - - If not defined, assume default C extensions (.c and .h) - - It would be possible to use python's glob function, but it is - very slow, and it is not interactive. So, it would wait to read all - directories before actually do something. - - So, let's use our own implementation. - """ - - if not valid_extensions: - self.extensions = (".c", ".h") - else: - self.extensions = valid_extensions - - self.srctree = srctree - - def _parse_dir(self, dirname): - """Internal function to parse files recursively""" - - with os.scandir(dirname) as obj: - for entry in obj: - name = os.path.join(dirname, entry.name) - - if entry.is_dir(follow_symlinks=False): - yield from self._parse_dir(name) - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if not basename.endswith(self.extensions): - continue - - yield name - - def parse_files(self, file_list, file_not_found_cb): - """ - Define an interator to parse all source files from file_list, - handling directories if any - """ - - if not file_list: - return - - for fname in file_list: - if self.srctree: - f = os.path.join(self.srctree, fname) - else: - f = fname - - if os.path.isdir(f): - yield from self._parse_dir(f) - elif os.path.isfile(f): - yield f - elif file_not_found_cb: - file_not_found_cb(fname) - - -class KernelFiles(): - """ - Parse kernel-doc tags on multiple kernel source files. - - There are two type of parsers defined here: - - self.parse_file(): parses both kernel-doc markups and - EXPORT_SYMBOL* macros; - - self.process_export_file(): parses only EXPORT_SYMBOL* macros. - """ - - def warning(self, msg): - """Ancillary routine to output a warning and increment error count""" - - self.config.log.warning(msg) - self.errors += 1 - - def error(self, msg): - """Ancillary routine to output an error and increment error count""" - - self.config.log.error(msg) - self.errors += 1 - - def parse_file(self, fname): - """ - Parse a single Kernel source. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.files: - return - - doc = KernelDoc(self.config, fname) - export_table, entries = doc.parse_kdoc() - - self.export_table[fname] = export_table - - self.files.add(fname) - self.export_files.add(fname) # parse_kdoc() already check exports - - self.results[fname] = entries - - def process_export_file(self, fname): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.export_files: - return - - doc = KernelDoc(self.config, fname) - export_table = doc.parse_export() - - if not export_table: - self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") - export_table = set() - - self.export_table[fname] = export_table - self.export_files.add(fname) - - def file_not_found_cb(self, fname): - """ - Callback to warn if a file was not found. - """ - - self.error(f"Cannot find file {fname}") - - def __init__(self, verbose=False, out_style=None, - werror=False, wreturn=False, wshort_desc=False, - wcontents_before_sections=False, - logger=None): - """ - Initialize startup variables and parse all files - """ - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) - - if out_style is None: - out_style = OutputFormat() - - if not werror: - kcflags = os.environ.get("KCFLAGS", None) - if kcflags: - match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) - if match: - werror = True - - # reading this variable is for backwards compat just in case - # someone was calling it with the variable from outside the - # kernel's build system - kdoc_werror = os.environ.get("KDOC_WERROR", None) - if kdoc_werror: - werror = kdoc_werror - - # Some variables are global to the parser logic as a whole as they are - # used to send control configuration to KernelDoc class. As such, - # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace - - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections - - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - - self.config.src_tree = os.environ.get("SRCTREE", None) - - # Initialize variables that are internal to KernelFiles - - self.out_style = out_style - - self.errors = 0 - self.results = {} - - self.files = set() - self.export_files = set() - self.export_table = {} - - def parse(self, file_list, export_file=None): - """ - Parse all files - """ - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in glob.parse_files(file_list, self.file_not_found_cb): - self.parse_file(fname) - - for fname in glob.parse_files(export_file, self.file_not_found_cb): - self.process_export_file(fname) - - def out_msg(self, fname, name, arg): - """ - Return output messages from a file name using the output style - filtering. - - If output type was not handled by the syler, return None. - """ - - # NOTE: we can add rules here to filter out unwanted parts, - # although OutputFormat.msg already does that. - - return self.out_style.msg(fname, name, arg) - - def msg(self, enable_lineno=False, export=False, internal=False, - symbol=None, nosymbol=None, no_doc_sections=False, - filenames=None, export_file=None): - """ - Interacts over the kernel-doc results and output messages, - returning kernel-doc markups on each interaction - """ - - self.out_style.set_config(self.config) - - if not filenames: - filenames = sorted(self.results.keys()) - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in filenames: - function_table = set() - - if internal or export: - if not export_file: - export_file = [fname] - - for f in glob.parse_files(export_file, self.file_not_found_cb): - function_table |= self.export_table[f] - - if symbol: - for s in symbol: - function_table.add(s) - - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" - if fname not in self.results: - self.config.log.warning("No kernel-doc for file %s", fname) - continue - - symbols = self.results[fname] - self.out_style.set_symbols(symbols) - - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) - - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") - - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m - - if msg: - yield fname, msg diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py deleted file mode 100644 index 19805301cb2c..000000000000 --- a/scripts/lib/kdoc/kdoc_item.py +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# A class that will, eventually, encapsulate all of the parsed data that we -# then pass into the output modules. -# - -class KdocItem: - def __init__(self, name, fname, type, start_line, **other_stuff): - self.name = name - self.fname = fname - self.type = type - self.declaration_start_line = start_line - self.sections = {} - self.sections_start_lines = {} - self.parameterlist = [] - self.parameterdesc_start_lines = [] - self.parameterdescs = {} - self.parametertypes = {} - # - # Just save everything else into our own dict so that the output - # side can grab it directly as before. As we move things into more - # structured data, this will, hopefully, fade away. - # - self.other_stuff = other_stuff - - def get(self, key, default = None): - return self.other_stuff.get(key, default) - - def __getitem__(self, key): - return self.get(key) - - # - # Tracking of section and parameter information. - # - def set_sections(self, sections, start_lines): - self.sections = sections - self.section_start_lines = start_lines - - def set_params(self, names, descs, types, starts): - self.parameterlist = names - self.parameterdescs = descs - self.parametertypes = types - self.parameterdesc_start_lines = starts diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py deleted file mode 100644 index 58f115059e93..000000000000 --- a/scripts/lib/kdoc/kdoc_output.py +++ /dev/null @@ -1,824 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 - -""" -Implement output filters to print kernel-doc documentation. - -The implementation uses a virtual base class (OutputFormat) which -contains a dispatches to virtual methods, and some code to filter -out output messages. - -The actual implementation is done on one separate class per each type -of output. Currently, there are output classes for ReST and man/troff. -""" - -import os -import re -from datetime import datetime - -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe - - -function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) - -# match expressions used to find embedded type information -type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) -type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) -type_func = KernRe(r"(\w+)\(\)", cache=False) -type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# Special RST handling for func ptr params -type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) - -# Special RST handling for structs with func ptr params -type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) - -type_env = KernRe(r"(\$\w+)", cache=False) -type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) -type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) -type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) -type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) -type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) -type_fallback = KernRe(r"\&([_\w]+)", cache=False) -type_member_func = type_member + KernRe(r"\(\)", cache=False) - - -class OutputFormat: - """ - Base class for OutputFormat. If used as-is, it means that only - warnings will be displayed. - """ - - # output mode. - OUTPUT_ALL = 0 # output all symbols and doc sections - OUTPUT_INCLUDE = 1 # output only specified symbols - OUTPUT_EXPORTED = 2 # output exported symbols - OUTPUT_INTERNAL = 3 # output non-exported symbols - - # Virtual member to be overriden at the inherited classes - highlights = [] - - def __init__(self): - """Declare internal vars and set mode to OUTPUT_ALL""" - - self.out_mode = self.OUTPUT_ALL - self.enable_lineno = None - self.nosymbol = {} - self.symbol = None - self.function_table = None - self.config = None - self.no_doc_sections = False - - self.data = "" - - def set_config(self, config): - """ - Setup global config variables used by both parser and output. - """ - - self.config = config - - def set_filter(self, export, internal, symbol, nosymbol, function_table, - enable_lineno, no_doc_sections): - """ - Initialize filter variables according with the requested mode. - - Only one choice is valid between export, internal and symbol. - - The nosymbol filter can be used on all modes. - """ - - self.enable_lineno = enable_lineno - self.no_doc_sections = no_doc_sections - self.function_table = function_table - - if symbol: - self.out_mode = self.OUTPUT_INCLUDE - elif export: - self.out_mode = self.OUTPUT_EXPORTED - elif internal: - self.out_mode = self.OUTPUT_INTERNAL - else: - self.out_mode = self.OUTPUT_ALL - - if nosymbol: - self.nosymbol = set(nosymbol) - - - def highlight_block(self, block): - """ - Apply the RST highlights to a sub-block of text. - """ - - for r, sub in self.highlights: - block = r.sub(sub, block) - - return block - - def out_warnings(self, args): - """ - Output warnings for identifiers that will be displayed. - """ - - for log_msg in args.warnings: - self.config.warning(log_msg) - - def check_doc(self, name, args): - """Check if DOC should be output""" - - if self.no_doc_sections: - return False - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode == self.OUTPUT_INCLUDE: - if name in self.function_table: - self.out_warnings(args) - return True - - return False - - def check_declaration(self, dtype, name, args): - """ - Checks if a declaration should be output or not based on the - filtering criteria. - """ - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: - if name in self.function_table: - return True - - if self.out_mode == self.OUTPUT_INTERNAL: - if dtype != "function": - self.out_warnings(args) - return True - - if name not in self.function_table: - self.out_warnings(args) - return True - - return False - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser - """ - - self.data = "" - - dtype = args.type - - if dtype == "doc": - self.out_doc(fname, name, args) - return self.data - - if not self.check_declaration(dtype, name, args): - return self.data - - if dtype == "function": - self.out_function(fname, name, args) - return self.data - - if dtype == "enum": - self.out_enum(fname, name, args) - return self.data - - if dtype == "typedef": - self.out_typedef(fname, name, args) - return self.data - - if dtype in ["struct", "union"]: - self.out_struct(fname, name, args) - return self.data - - # Warn if some type requires an output logic - self.config.log.warning("doesn't now how to output '%s' block", - dtype) - - return None - - # Virtual methods to be overridden by inherited classes - # At the base class, those do nothing. - def set_symbols(self, symbols): - """Get a list of all symbols from kernel_doc""" - - def out_doc(self, fname, name, args): - """Outputs a DOC block""" - - def out_function(self, fname, name, args): - """Outputs a function""" - - def out_enum(self, fname, name, args): - """Outputs an enum""" - - def out_typedef(self, fname, name, args): - """Outputs a typedef""" - - def out_struct(self, fname, name, args): - """Outputs a struct""" - - -class RestFormat(OutputFormat): - """Consts and functions used by ReST output""" - - highlights = [ - (type_constant, r"``\1``"), - (type_constant2, r"``\1``"), - - # Note: need to escape () to avoid func matching later - (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), - (type_member, r":c:type:`\1\2\3 <\1>`"), - (type_fp_param, r"**\1\\(\\)**"), - (type_fp_param2, r"**\1\\(\\)**"), - (type_func, r"\1()"), - (type_enum, r":c:type:`\1 <\2>`"), - (type_struct, r":c:type:`\1 <\2>`"), - (type_typedef, r":c:type:`\1 <\2>`"), - (type_union, r":c:type:`\1 <\2>`"), - - # in rst this can refer to any type - (type_fallback, r":c:type:`\1`"), - (type_param_ref, r"**\1\2**") - ] - blankline = "\n" - - sphinx_literal = KernRe(r'^[^.].*::$', cache=False) - sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) - - def __init__(self): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.lineprefix = "" - - def print_lineno(self, ln): - """Outputs a line number""" - - if self.enable_lineno and ln is not None: - ln += 1 - self.data += f".. LINENO {ln}\n" - - def output_highlight(self, args): - """ - Outputs a C symbol that may require being converted to ReST using - the self.highlights variable - """ - - input_text = args - output = "" - in_literal = False - litprefix = "" - block = "" - - for line in input_text.strip("\n").split("\n"): - - # If we're in a literal block, see if we should drop out of it. - # Otherwise, pass the line straight through unmunged. - if in_literal: - if line.strip(): # If the line is not blank - # If this is the first non-blank line in a literal block, - # figure out the proper indent. - if not litprefix: - r = KernRe(r'^(\s*)') - if r.match(line): - litprefix = '^' + r.group(1) - else: - litprefix = "" - - output += line + "\n" - elif not KernRe(litprefix).match(line): - in_literal = False - else: - output += line + "\n" - else: - output += line + "\n" - - # Not in a literal block (or just dropped out) - if not in_literal: - block += line + "\n" - if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): - in_literal = True - litprefix = "" - output += self.highlight_block(block) - block = "" - - # Handle any remaining block - if block: - output += self.highlight_block(block) - - # Print the output with the line prefix - for line in output.strip("\n").split("\n"): - self.data += self.lineprefix + line + "\n" - - def out_section(self, args, out_docblock=False): - """ - Outputs a block section. - - This could use some work; it's used to output the DOC: sections, and - starts by putting out the name of the doc section itself, but that - tends to duplicate a header already in the template file. - """ - for section, text in args.sections.items(): - # Skip sections that are in the nosymbol_table - if section in self.nosymbol: - continue - - if out_docblock: - if not self.out_mode == self.OUTPUT_INCLUDE: - self.data += f".. _{section}:\n\n" - self.data += f'{self.lineprefix}**{section}**\n\n' - else: - self.data += f'{self.lineprefix}**{section}**\n\n' - - self.print_lineno(args.section_start_lines.get(section, 0)) - self.output_highlight(text) - self.data += "\n" - self.data += "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - self.out_section(args, out_docblock=True) - - def out_function(self, fname, name, args): - - oldprefix = self.lineprefix - signature = "" - - func_macro = args.get('func_macro', False) - if func_macro: - signature = name - else: - if args.get('functiontype'): - signature = args['functiontype'] + " " - signature += name + " (" - - ln = args.declaration_start_line - count = 0 - for parameter in args.parameterlist: - if count != 0: - signature += ", " - count += 1 - dtype = args.parametertypes.get(parameter, "") - - if function_pointer.search(dtype): - signature += function_pointer.group(1) + parameter + function_pointer.group(3) - else: - signature += dtype - - if not func_macro: - signature += ")" - - self.print_lineno(ln) - if args.get('typedef') or not args.get('functiontype'): - self.data += f".. c:macro:: {name}\n\n" - - if args.get('typedef'): - self.data += " **Typedef**: " - self.lineprefix = "" - self.output_highlight(args.get('purpose', "")) - self.data += "\n\n**Syntax**\n\n" - self.data += f" ``{signature}``\n\n" - else: - self.data += f"``{signature}``\n\n" - else: - self.data += f".. c:function:: {signature}\n\n" - - if not args.get('typedef'): - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', "")) - self.data += "\n" - - # Put descriptive text into a container (HTML
) to help set - # function prototypes apart - self.lineprefix = " " - - if args.parameterlist: - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Parameters**\n\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - dtype = args.parametertypes.get(parameter, "") - - if dtype: - self.data += f"{self.lineprefix}``{dtype}``\n" - else: - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.lineprefix = " " - if parameter_name in args.parameterdescs and \ - args.parameterdescs[parameter_name] != KernelDoc.undescribed: - - self.output_highlight(args.parameterdescs[parameter_name]) - self.data += "\n" - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.lineprefix = " " - - self.out_section(args) - self.lineprefix = oldprefix - - def out_enum(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:enum:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', '')) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - outer = self.lineprefix + " " - self.lineprefix = outer + " " - self.data += f"{outer}**Constants**\n\n" - - for parameter in args.parameterlist: - self.data += f"{outer}``{parameter}``\n" - - if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: - self.output_highlight(args.parameterdescs[parameter]) - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_typedef(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:type:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - - self.output_highlight(args.get('purpose', '')) - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_struct(self, fname, name, args): - - purpose = args.get('purpose', "") - declaration = args.get('definition', "") - dtype = args.type - ln = args.declaration_start_line - - self.data += f"\n\n.. c:{dtype}:: {name}\n\n" - - self.print_lineno(ln) - - oldprefix = self.lineprefix - self.lineprefix += " " - - self.output_highlight(purpose) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Definition**::\n\n" - - self.lineprefix = self.lineprefix + " " - - declaration = declaration.replace("\t", self.lineprefix) - - self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" - self.data += f"{declaration}{self.lineprefix}" + "};\n\n" - - self.lineprefix = " " - self.data += f"{self.lineprefix}**Members**\n\n" - for parameter in args.parameterlist: - if not parameter or parameter.startswith("#"): - continue - - parameter_name = parameter.split("[", maxsplit=1)[0] - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.lineprefix = " " - self.output_highlight(args.parameterdescs[parameter_name]) - self.lineprefix = " " - - self.data += "\n" - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - -class ManFormat(OutputFormat): - """Consts and functions used by man pages output""" - - highlights = ( - (type_constant, r"\1"), - (type_constant2, r"\1"), - (type_func, r"\\fB\1\\fP"), - (type_enum, r"\\fI\1\\fP"), - (type_struct, r"\\fI\1\\fP"), - (type_typedef, r"\\fI\1\\fP"), - (type_union, r"\\fI\1\\fP"), - (type_param, r"\\fI\1\\fP"), - (type_param_ref, r"\\fI\1\2\\fP"), - (type_member, r"\\fI\1\2\3\\fP"), - (type_fallback, r"\\fI\1\\fP") - ) - blankline = "" - - date_formats = [ - "%a %b %d %H:%M:%S %Z %Y", - "%a %b %d %H:%M:%S %Y", - "%Y-%m-%d", - "%b %d %Y", - "%B %d %Y", - "%m %d %Y", - ] - - def __init__(self, modulename): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.modulename = modulename - self.symbols = [] - - dt = None - tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") - if tstamp: - for fmt in self.date_formats: - try: - dt = datetime.strptime(tstamp, fmt) - break - except ValueError: - pass - - if not dt: - dt = datetime.now() - - self.man_date = dt.strftime("%B %Y") - - def arg_name(self, args, name): - """ - Return the name that will be used for the man page. - - As we may have the same name on different namespaces, - prepend the data type for all types except functions and typedefs. - - The doc section is special: it uses the modulename. - """ - - dtype = args.type - - if dtype == "doc": - return self.modulename - - if dtype in ["function", "typedef"]: - return name - - return f"{dtype} {name}" - - def set_symbols(self, symbols): - """ - Get a list of all symbols from kernel_doc. - - Man pages will uses it to add a SEE ALSO section with other - symbols at the same file. - """ - self.symbols = symbols - - def out_tail(self, fname, name, args): - """Adds a tail for all man pages""" - - # SEE ALSO section - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" - self.data += (f"Kernel file \\fB{args.fname}\\fR\n") - if len(self.symbols) >= 2: - cur_name = self.arg_name(args, name) - - related = [] - for arg in self.symbols: - out_name = self.arg_name(arg, arg.name) - - if cur_name == out_name: - continue - - related.append(f"\\fB{out_name}\\fR(9)") - - self.data += ",\n".join(related) + "\n" - - # TODO: does it make sense to add other sections? Maybe - # REPORTING ISSUES? LICENSE? - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser. - - Add a tail at the end of man pages output. - """ - super().msg(fname, name, args) - self.out_tail(fname, name, args) - - return self.data - - def output_highlight(self, block): - """ - Outputs a C symbol that may require being highlighted with - self.highlights variable using troff syntax - """ - - contents = self.highlight_block(block) - - if isinstance(contents, list): - contents = "\n".join(contents) - - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue - - if line[0] == ".": - self.data += "\\&" + line + "\n" - else: - self.data += line + "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_function(self, fname, name, args): - """output function in man""" - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - if args.get('functiontype', ''): - self.data += f'.B "{args["functiontype"]}" {name}' + "\n" - else: - self.data += f'.B "{name}' + "\n" - - count = 0 - parenth = "(" - post = "," - - for parameter in args.parameterlist: - if count == len(args.parameterlist) - 1: - post = ");" - - dtype = args.parametertypes.get(parameter, "") - if function_pointer.match(dtype): - # Pointer-to-function - self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" - else: - dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) - - self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" - count += 1 - parenth = "" - - if args.parameterlist: - self.data += ".SH ARGUMENTS\n" - - for parameter in args.parameterlist: - parameter_name = re.sub(r'\[.*', '', parameter) - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section.upper()}"' + "\n" - self.output_highlight(text) - - def out_enum(self, fname, name, args): - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"enum {name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - self.data += f"enum {name}" + " {\n" - - count = 0 - for parameter in args.parameterlist: - self.data += f'.br\n.BI " {parameter}"' + "\n" - if count == len(args.parameterlist) - 1: - self.data += "\n};\n" - else: - self.data += ", \n.br\n" - - count += 1 - - self.data += ".SH Constants\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_typedef(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"typedef {name} \\- {purpose}\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_struct(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - definition = args.get('definition') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{args.type} {name} \\- {purpose}\n" - - # Replace tabs with two spaces and handle newlines - declaration = definition.replace("\t", " ") - declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) - - self.data += ".SH SYNOPSIS\n" - self.data += f"{args.type} {name} " + "{" + "\n.br\n" - self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" - - self.data += ".SH Members\n" - for parameter in args.parameterlist: - if parameter.startswith("#"): - continue - - parameter_name = re.sub(r"\[.*", "", parameter) - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name)) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py deleted file mode 100644 index f7dbb0868367..000000000000 --- a/scripts/lib/kdoc/kdoc_parser.py +++ /dev/null @@ -1,1667 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 - -""" -kdoc_parser -=========== - -Read a C language source or header FILE and extract embedded -documentation comments -""" - -import sys -import re -from pprint import pformat - -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - -# Allow whitespace at end of comment start. -doc_start = KernRe(r'^/\*\*\s*$', cache=False) - -doc_end = KernRe(r'\*/', cache=False) -doc_com = KernRe(r'\s*\*\s*', cache=False) -doc_com_body = KernRe(r'\s*\* ?', cache=False) -doc_decl = doc_com + KernRe(r'(\w+)', cache=False) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -known_section_names = 'description|context|returns?|notes?|examples?' -known_sections = KernRe(known_section_names, flags = re.I) -doc_sect = doc_com + \ - KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', - flags=re.I, cache=False) - -doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) - -export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) - -type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# -# Tests for the beginning of a kerneldoc block in its various forms. -# -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) -doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) -doc_begin_func = KernRe(str(doc_com) + # initial " * ' - r"(?:\w+\s*\*\s*)?" + # type (not captured) - r'(?:define\s+)?' + # possible "define" (not captured) - r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" - r'(?:[-:].*)?$', # description (not captured) - cache = False) - -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end limiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Apply a set of transforms to a block of text. -# -def apply_transforms(xforms, text): - for search, subst in xforms: - text = search.sub(subst, text) - return text - -# -# A little helper to get rid of excess white space -# -multi_space = KernRe(r'\s\s+') -def trim_whitespace(s): - return multi_space.sub(' ', s.strip()) - -# -# Remove struct/enum members that have been marked "private". -# -def trim_private_members(text): - # - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() - -class state: - """ - State machine enums - """ - - # Parser states - NORMAL = 0 # normal code - NAME = 1 # looking for function name - DECLARATION = 2 # We have seen a declaration which might not be done - BODY = 3 # the body of the comment - SPECIAL_SECTION = 4 # doc section ending with a blank line - PROTO = 5 # scanning prototype - DOCBLOCK = 6 # documentation block - INLINE_NAME = 7 # gathering doc outside main block - INLINE_TEXT = 8 # reading the body of inline docs - - name = [ - "NORMAL", - "NAME", - "DECLARATION", - "BODY", - "SPECIAL_SECTION", - "PROTO", - "DOCBLOCK", - "INLINE_NAME", - "INLINE_TEXT", - ] - - -SECTION_DEFAULT = "Description" # default section - -class KernelEntry: - - def __init__(self, config, fname, ln): - self.config = config - self.fname = fname - - self._contents = [] - self.prototype = "" - - self.warnings = [] - - self.parameterlist = [] - self.parameterdescs = {} - self.parametertypes = {} - self.parameterdesc_start_lines = {} - - self.section_start_lines = {} - self.sections = {} - - self.anon_struct_union = False - - self.leading_space = None - - self.fname = fname - - # State flags - self.brcount = 0 - self.declaration_start_line = ln + 1 - - # - # Management of section contents - # - def add_text(self, text): - self._contents.append(text) - - def contents(self): - return '\n'.join(self._contents) + '\n' - - # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - log_msg = f"{self.fname}:{ln} {msg}" - - if not warning: - self.config.log.info(log_msg) - return - - # Delegate warning output to output logic, as this way it - # will report warnings/info only for symbols that are output - - self.warnings.append(log_msg) - return - - # - # Begin a new section. - # - def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): - if dump: - self.dump_section(start_new = True) - self.section = title - self.new_start_line = line_no - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - # - # If we have accumulated no contents in the default ("description") - # section, don't bother. - # - if self.section == SECTION_DEFAULT and not self._contents: - return - name = self.section - contents = self.contents() - - if type_param.match(name): - name = type_param.group(1) - - self.parameterdescs[name] = contents - self.parameterdesc_start_lines[name] = self.new_start_line - - self.new_start_line = 0 - - else: - if name in self.sections and self.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != SECTION_DEFAULT: - self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'") - # Treat as a new paragraph - add a blank line - self.sections[name] += '\n' + contents - else: - self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line - self.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) - - if start_new: - self.section = SECTION_DEFAULT - self._contents = [] - -python_warning = False - -class KernelDoc: - """ - Read a C language source or header FILE and extract embedded - documentation comments. - """ - - # Section names - - section_context = "Context" - section_return = "Return" - - undescribed = "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname = fname - self.config = config - - # Initial state for the state machines - self.state = state.NORMAL - - # Store entry currently being processed - self.entry = None - - # Place all potential outputs into an array - self.entries = [] - - # - # We need Python 3.7 for its "dicts remember the insertion - # order" guarantee - # - global python_warning - if (not python_warning and - sys.version_info.major == 3 and sys.version_info.minor < 7): - - self.emit_msg(0, - 'Python 3.7 or later is required for correct results') - python_warning = True - - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - if self.entry: - self.entry.emit_msg(ln, msg, warning=warning) - return - - log_msg = f"{self.fname}:{ln} {msg}" - - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - if self.entry: - self.entry.dump_section(start_new) - - # TODO: rename it to store_declaration after removal of kernel-doc.pl - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - item = KdocItem(name, self.fname, dtype, - self.entry.declaration_start_line, **args) - item.warnings = self.entry.warnings - - # Drop empty sections - # TODO: improve empty sections logic to emit warnings - sections = self.entry.sections - for section in ["Description", "Return"]: - if section in sections and not sections[section].rstrip(): - del sections[section] - item.set_sections(sections, self.entry.section_start_lines) - item.set_params(self.entry.parameterlist, self.entry.parameterdescs, - self.entry.parametertypes, - self.entry.parameterdesc_start_lines) - self.entries.append(item) - - self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - # - # Flush the warnings out before we proceed further - # - if self.entry and self.entry not in self.entries: - for log_msg in self.entry.warnings: - self.config.log.warning(log_msg) - - self.entry = KernelEntry(self.config, self.fname, ln) - - # State flags - self.state = state.NORMAL - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - """ - Store parameters and their descriptions at self.entry. - """ - - if self.entry.anon_struct_union and dtype == "" and param == "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union = False - - param = KernRe(r'[\[\)].*').sub('', param, count=1) - - # - # Look at various "anonymous type" cases. - # - if dtype == '': - if param.endswith("..."): - if len(param) > 3: # there is a name provided, use that - param = param[:-3] - if not self.entry.parameterdescs.get(param): - self.entry.parameterdescs[param] = "variable arguments" - - elif (not param) or param == "void": - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith("#"): - self.entry.parameterdescs[param] = self.undescribed - - if "." not in param: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_msg(ln, - f"{dname} '{param}' not described in '{declaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg = KernRe(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] = org_arg - - - def create_parameter_list(self, ln, decl_type, args, - splitter, declaration_name): - """ - Creates a list of parameters, storing them at self.entry. - """ - - # temporarily replace all commas inside function pointer definition - arg_expr = KernRe(r'(\([^\),]+),') - while arg_expr.search(args): - args = arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Ignore argument attributes - arg = KernRe(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg = arg.strip() - arg = KernRe(r'\s+').sub(' ', arg, count=1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - # - # The pointer-to-function case. - # - elif KernRe(r'\(.+\)\s*\(').search(arg): - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" - r'([\w\[\].]*)' # Capture the name and possible [array] - r'\s*\)') # Make sure the trailing ")" is there - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - # - # The array-of-pointers case. Dig the parameter name out from the middle - # of the declaration. - # - elif KernRe(r'\(.+\)\s*\[').search(arg): - r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" - r'([\w.]*?)' # The actual pointer name - r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif arg: - # - # Clean up extraneous spaces and split the string at commas; the first - # element of the resulting list will also include the type information. - # - arg = KernRe(r'\s*:\s*').sub(":", arg) - arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - # - # args[0] has a string of "type a". If "a" includes an [array] - # declaration, we want to not be fooled by any white space inside - # the brackets, so detect and handle that case specially. - # - r = KernRe(r'^([^[\]]*\s+)(.*)$') - if r.match(args[0]): - args[0] = r.group(2) - dtype = r.group(1) - else: - # No space in args[0]; this seems wrong but preserves previous behavior - dtype = '' - - bitfield_re = KernRe(r'(.*?):(\w+)') - for param in args: - # - # For pointers, shift the star(s) from the variable name to the - # type declaration. - # - r = KernRe(r'^(\*+)\s*(.*)') - if r.match(param): - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - # - # Perform a similar shift for bitfields. - # - elif bitfield_re.search(param): - if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, bitfield_re.group(1), - f"{dtype}:{bitfield_re.group(2)}", - arg, declaration_name) - else: - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type): - """ - Check for errors inside sections, emitting warnings if not found - parameters are described. - """ - for section in self.entry.sections: - if section not in self.entry.parameterlist and \ - not known_sections.search(section): - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - self.emit_msg(ln, - f"Excess {dname} '{section}' description in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - """ - If the function doesn't return void, warns about the lack of a - return description. - """ - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_msg(ln, - f"No description found for return value of '{declaration_name}'") - - # - # Split apart a structure prototype; returns (struct|union, name, members) or None - # - def split_struct_proto(self, proto): - type_pattern = r'(struct|union)' - qualifiers = [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - return (r.group(1), r.group(2), r.group(3)) - else: - r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - return (r.group(1), r.group(3), r.group(2)) - return None - # - # Rewrite the members of a structure or union for easier formatting later on. - # Among other things, this function will turn a member like: - # - # struct { inner_members; } foo; - # - # into: - # - # struct foo; inner_members; - # - def rewrite_struct_members(self, members): - # - # Process struct/union members from the most deeply nested outward. The - # trick is in the ^{ below - it prevents a match of an outer struct/union - # until the inner one has been munged (removing the "{" in the process). - # - struct_members = KernRe(r'(struct|union)' # 0: declaration type - r'([^\{\};]+)' # 1: possible name - r'(\{)' - r'([^\{\}]*)' # 3: Contents of declaration - r'(\})' - r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration - tuples = struct_members.findall(members) - while tuples: - for t in tuples: - newmember = "" - oldmember = "".join(t) # Reconstruct the original formatting - dtype, name, lbr, content, rbr, rest, semi = t - # - # Pass through each field name, normalizing the form and formatting. - # - for s_id in rest.split(','): - s_id = s_id.strip() - newmember += f"{dtype} {s_id}; " - # - # Remove bitfield/array/pointer info, getting the bare name. - # - s_id = KernRe(r'[:\[].*').sub('', s_id) - s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - # - # Pass through the members of this inner structure/union. - # - for arg in content.split(';'): - arg = arg.strip() - # - # Look for (type)(*name)(args) - pointer to function - # - r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') - if r.match(arg): - dtype, name, extra = r.group(1), r.group(2), r.group(3) - # Pointer-to-function - if not s_id: - # Anonymous struct/union - newmember += f"{dtype}{name}{extra}; " - else: - newmember += f"{dtype}{s_id}.{name}{extra}; " - # - # Otherwise a non-function member. - # - else: - # - # Remove bitmap and array portions and spaces around commas - # - arg = KernRe(r':\s*\d+\s*').sub('', arg) - arg = KernRe(r'\[.*\]').sub('', arg) - arg = KernRe(r'\s*,\s*').sub(',', arg) - # - # Look for a normal decl - "type name[,name...]" - # - r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - for name in r.group(2).split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) - if not s_id: - # Anonymous struct/union - newmember += f"{r.group(1)} {name}; " - else: - newmember += f"{r.group(1)} {s_id}.{name}; " - else: - newmember += f"{arg}; " - # - # At the end of the s_id loop, replace the original declaration with - # the munged version. - # - members = members.replace(oldmember, newmember) - # - # End of the tuple loop - search again and see if there are outer members - # that now turn up. - # - tuples = struct_members.findall(members) - return members - - # - # Format the struct declaration into a standard form for inclusion in the - # resulting docs. - # - def format_struct_decl(self, declaration): - # - # Insert newlines, get rid of extra spaces. - # - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - # - # Format inline enums with each member on its own line. - # - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - while r.search(declaration): - declaration = r.sub(r'\1,\n\2', declaration) - # - # Now go through and supply the right number of tabs - # for each line. - # - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) - if clause: - if '}' in clause and level > 1: - level -= 1 - if not clause.startswith('#'): - declaration += "\t" * level - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - return declaration - - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - # - # Deal with embedded struct and union members, and drop enums entirely. - # - declaration = members - members = self.rewrite_struct_members(members) - members = re.sub(r'(\{[^\{\}]*\})', '', members) - # - # Output the result and we are done. - # - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type) - self.output_declaration(decl_type, declaration_name, - definition=self.format_struct_decl(declaration), - purpose=self.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - """ - Stores an enum inside self.entries array. - """ - # - # Strip preprocessor directives. Note that this depends on the - # trailing semicolon we added in process_proto_type(). - # - proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # - # Parse out the name and members of the enum. Typedef form first. - # - r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name = r.group(2) - members = trim_private_members(r.group(1)) - # - # Failing that, look for a straight enum - # - else: - r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name = r.group(1) - members = trim_private_members(r.group(2)) - # - # OK, this isn't going to work. - # - else: - self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") - return - # - # Make sure we found what we were expecting. - # - if self.entry.identifier != declaration_name: - if self.entry.identifier == "": - self.emit_msg(ln, - f"{proto}: wrong kernel-doc identifier on prototype") - else: - self.emit_msg(ln, - f"expecting prototype for enum {self.entry.identifier}. " - f"Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name = "(anonymous)" - # - # Parse out the name of each enum member, and verify that we - # have a description for it. - # - member_set = set() - members = KernRe(r'\([^;)]*\)').sub('', members) - for arg in members.split(','): - if not arg: - continue - arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] = self.undescribed - self.emit_msg(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") - member_set.add(arg) - # - # Ensure that every described member actually exists in the enum. - # - for k in self.entry.parameterdescs: - if k not in member_set: - self.emit_msg(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - purpose=self.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - """ - Stores a data declaration inside self.entries array. - """ - - if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) - elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) - elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - else: - # This would be a bug - self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - - def dump_function(self, ln, prototype): - """ - Stores a function of function macro inside self.entries array. - """ - - found = func_macro = False - return_type = '' - decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) - # - # If we have a macro, remove the "#define" at the front. - # - new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) - if new_proto != prototype: - prototype = new_proto - # - # Dispense with the simple "#define A B" case here; the key - # is the space after the name of the symbol being defined. - # NOTE that the seemingly misnamed "func_macro" indicates a - # macro *without* arguments. - # - r = KernRe(r'^(\w+)\s+') - if r.search(prototype): - return_type = '' - declaration_name = r.group(1) - func_macro = True - found = True - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - - name = r'\w+' - type1 = r'(?:[\w\s]+)?' - type2 = r'(?:[\w\s]+\*+)+' - # - # Attempt to match first on (args) with no internal parentheses; this - # lets us easily filter out __acquires() and other post-args stuff. If - # that fails, just grab the rest of the line to the last closing - # parenthesis. - # - proto_args = r'\(([^\(]*|.*)\)' - # - # (Except for the simple macro case) attempt to split up the prototype - # in the various ways we understand. - # - if not found: - patterns = [ - rf'^()({name})\s*{proto_args}', - rf'^({type1})\s+({name})\s*{proto_args}', - rf'^({type2})\s*({name})\s*{proto_args}', - ] - - for p in patterns: - r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) - declaration_name = r.group(2) - args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', - declaration_name) - found = True - break - # - # Parsing done; make sure that things are as we expect. - # - if not found: - self.emit_msg(ln, - f"cannot understand function prototype: '{prototype}'") - return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " - f"Prototype was for {declaration_name}() instead") - return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) - # - # Store the result. - # - self.output_declaration(decl_type, declaration_name, - typedef=('typedef' in return_type), - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - - - def dump_typedef(self, ln, proto): - """ - Stores a typedef inside self.entries array. - """ - # - # We start by looking for function typedefs. - # - typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' - typedef_ident = r'\*?\s*(\w\S+)\s*' - typedef_args = r'\s*\((.*)\);' - - typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) - - # Parse function typedef prototypes - for r in [typedef1, typedef2]: - if not r.match(proto): - continue - - return_type = r.group(1).strip() - declaration_name = r.group(2) - args = r.group(3) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.create_parameter_list(ln, 'function', args, ',', declaration_name) - - self.output_declaration('function', declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose) - return - # - # Not a function, try to parse a simple typedef. - # - r = KernRe(r'typedef.*\s+(\w+)\s*;') - if r.match(proto): - declaration_name = r.group(1) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.output_declaration('typedef', declaration_name, - purpose=self.entry.declaration_purpose) - return - - self.emit_msg(ln, "error: Cannot parse typedef!") - - @staticmethod - def process_export(function_set, line): - """ - process EXPORT_SYMBOL* tags - - This method doesn't use any variable from the class, so declare it - with a staticmethod decorator. - """ - - # We support documenting some exported symbols with different - # names. A horrible hack. - suffixes = [ '_noprof' ] - - # Note: it accepts only one EXPORT_SYMBOL* per line, as having - # multiple export lines would violate Kernel coding style. - - if export_symbol.search(line): - symbol = export_symbol.group(2) - elif export_symbol_ns.search(line): - symbol = export_symbol_ns.group(2) - else: - return False - # - # Found an export, trim out any special suffixes - # - for suffix in suffixes: - # Be backward compatible with Python < 3.9 - if symbol.endswith(suffix): - symbol = symbol[:-len(suffix)] - function_set.add(symbol) - return True - - def process_normal(self, ln, line): - """ - STATE_NORMAL: looking for the /** to begin everything. - """ - - if not doc_start.match(line): - return - - # start a new entry - self.reset_state(ln) - - # next line is always the function name - self.state = state.NAME - - def process_name(self, ln, line): - """ - STATE_NAME: Looking for the "name - description" line - """ - # - # Check for a DOC: block and handle them specially. - # - if doc_block.search(line): - - if not doc_block.group(1): - self.entry.begin_section(ln, "Introduction") - else: - self.entry.begin_section(ln, doc_block.group(1)) - - self.entry.identifier = self.entry.section - self.state = state.DOCBLOCK - # - # Otherwise we're looking for a normal kerneldoc declaration line. - # - elif doc_decl.search(line): - self.entry.identifier = doc_decl.group(1) - - # Test for data declaration - if doc_begin_data.search(line): - self.entry.decl_type = doc_begin_data.group(1) - self.entry.identifier = doc_begin_data.group(2) - # - # Look for a function description - # - elif doc_begin_func.search(line): - self.entry.identifier = doc_begin_func.group(1) - self.entry.decl_type = "function" - # - # We struck out. - # - else: - self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = state.NORMAL - return - # - # OK, set up for a new kerneldoc entry. - # - self.state = state.BODY - self.entry.identifier = self.entry.identifier.strip(" ") - # if there's no @param blocks need to set up default section here - self.entry.begin_section(ln + 1) - # - # Find the description portion, which *should* be there but - # isn't always. - # (We should be able to capture this from the previous parsing - someday) - # - r = KernRe("[-:](.*)") - if r.search(line): - self.entry.declaration_purpose = trim_whitespace(r.group(1)) - self.state = state.DECLARATION - else: - self.entry.declaration_purpose = "" - - if not self.entry.declaration_purpose and self.config.wshort_desc: - self.emit_msg(ln, - f"missing initial short description on line:\n{line}") - - if not self.entry.identifier and self.entry.decl_type != "enum": - self.emit_msg(ln, - f"wrong kernel-doc identifier on line:\n{line}") - self.state = state.NORMAL - - if self.config.verbose: - self.emit_msg(ln, - f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", - warning=False) - # - # Failed to find an identifier. Emit a warning - # - else: - self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - - # - # Helper function to determine if a new section is being started. - # - def is_new_section(self, ln, line): - if doc_sect.search(line): - self.state = state.BODY - # - # Pick out the name of our new section, tweaking it if need be. - # - newsection = doc_sect.group(1) - if newsection.lower() == 'description': - newsection = 'Description' - elif newsection.lower() == 'context': - newsection = 'Context' - self.state = state.SPECIAL_SECTION - elif newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection = "Return" - self.state = state.SPECIAL_SECTION - elif newsection[0] == '@': - self.state = state.SPECIAL_SECTION - # - # Initialize the contents, and get the new section going. - # - newcontents = doc_sect.group(2) - if not newcontents: - newcontents = "" - self.dump_section() - self.entry.begin_section(ln, newsection) - self.entry.leading_space = None - - self.entry.add_text(newcontents.lstrip()) - return True - return False - - # - # Helper function to detect (and effect) the end of a kerneldoc comment. - # - def is_comment_end(self, ln, line): - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') - if r.match(line): - self.emit_msg(ln, f"suspicious ending line: {line}") - - self.entry.prototype = "" - self.entry.new_start_line = ln + 1 - - self.state = state.PROTO - return True - return False - - - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # Look for anything with the " * " line beginning. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # A blank line means that we have moved out of the declaration - # part of the comment (without any "special section" parameter - # descriptions). - # - if cont == "": - self.state = state.BODY - # - # Otherwise we have more of the declaration section to soak up. - # - else: - self.entry.declaration_purpose = \ - trim_whitespace(self.entry.declaration_purpose + ' ' + cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - - def process_special(self, ln, line): - """ - STATE_SPECIAL_SECTION: a section ending with a blank line - """ - # - # If we have hit a blank line (only the " * " marker), then this - # section is done. - # - if KernRe(r"\s*\*\s*$").match(line): - self.entry.begin_section(ln, dump = True) - self.state = state.BODY - return - # - # Not a blank line, look for the other ways to end the section. - # - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # OK, we should have a continuation of the text for this section. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # If the lines of text after the first in a special section have - # leading white space, we need to trim it out or Sphinx will get - # confused. For the second line (the None case), see what we - # find there and remember it. - # - if self.entry.leading_space is None: - r = KernRe(r'^(\s+)') - if r.match(cont): - self.entry.leading_space = len(r.group(1)) - else: - self.entry.leading_space = 0 - # - # Otherwise, before trimming any leading chars, be *sure* - # that they are white space. We should maybe warn if this - # isn't the case. - # - for i in range(0, self.entry.leading_space): - if cont[i] != " ": - self.entry.leading_space = i - break - # - # Add the trimmed result to the section and we're done. - # - self.entry.add_text(cont[self.entry.leading_space:]) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_body(self, ln, line): - """ - STATE_BODY: the bulk of a kerneldoc comment. - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - - if doc_content.search(line): - cont = doc_content.group(1) - self.entry.add_text(cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_inline_name(self, ln, line): - """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" - - if doc_inline_sect.search(line): - self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.add_text(doc_inline_sect.group(2).lstrip()) - self.state = state.INLINE_TEXT - elif doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") - self.state = state.PROTO - # else ... ?? - - def process_inline_text(self, ln, line): - """STATE_INLINE_TEXT: docbook comments within a prototype.""" - - if doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - # else ... ?? - - def syscall_munge(self, ln, proto): # pylint: disable=W0613 - """ - Handle syscall definitions - """ - - is_void = False - - # Strip newlines/CR's - proto = re.sub(r'[\r\n]+', ' ', proto) - - # Check if it's a SYSCALL_DEFINE0 - if 'SYSCALL_DEFINE0' in proto: - is_void = True - - # Replace SYSCALL_DEFINE with correct return type & function name - proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - - r = KernRe(r'long\s+(sys_.*?),') - if r.search(proto): - proto = KernRe(',').sub('(', proto, count=1) - elif is_void: - proto = KernRe(r'\)').sub('(void)', proto, count=1) - - # Now delete all of the odd-numbered commas in the proto - # so that argument types & names don't have a comma between them - count = 0 - length = len(proto) - - if is_void: - length = 0 # skip the loop if is_void - - for ix in range(length): - if proto[ix] == ',': - count += 1 - if count % 2 == 1: - proto = proto[:ix] + ' ' + proto[ix + 1:] - - return proto - - def tracepoint_munge(self, ln, proto): - """ - Handle tracepoint definitions - """ - - tracepointname = None - tracepointargs = None - - # Match tracepoint name based on different patterns - r = KernRe(r'TRACE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') - if r.search(proto): - tracepointname = r.group(2) - - if tracepointname: - tracepointname = tracepointname.lstrip() - - r = KernRe(r'TP_PROTO\((.*?)\)') - if r.search(proto): - tracepointargs = r.group(1) - - if not tracepointname or not tracepointargs: - self.emit_msg(ln, - f"Unrecognized tracepoint format:\n{proto}\n") - else: - proto = f"static inline void trace_{tracepointname}({tracepointargs})" - self.entry.identifier = f"trace_{self.entry.identifier}" - - return proto - - def process_proto_function(self, ln, line): - """Ancillary routine to process a function prototype""" - - # strip C99-style comments to end of line - line = KernRe(r"//.*$", re.S).sub('', line) - # - # Soak up the line's worth of prototype text, stopping at { or ; if present. - # - if KernRe(r'\s*#\s*define').match(line): - self.entry.prototype = line - elif not line.startswith('#'): # skip other preprocessor stuff - r = KernRe(r'([^\{]*)') - if r.match(line): - self.entry.prototype += r.group(1) + " " - # - # If we now have the whole prototype, clean it up and declare victory. - # - if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): - # strip comments and surrounding spaces - self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() - # - # Handle self.entry.prototypes for function pointers like: - # int (*pcs_config)(struct foo) - # by turning it into - # int pcs_config(struct foo) - # - r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') - self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - # - # Handle special declaration syntaxes - # - if 'SYSCALL_DEFINE' in self.entry.prototype: - self.entry.prototype = self.syscall_munge(ln, - self.entry.prototype) - else: - r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype = self.tracepoint_munge(ln, - self.entry.prototype) - # - # ... and we're done - # - self.dump_function(ln, self.entry.prototype) - self.reset_state(ln) - - def process_proto_type(self, ln, line): - """Ancillary routine to process a type""" - - # Strip C99-style comments and surrounding whitespace - line = KernRe(r"//.*$", re.S).sub('', line).strip() - if not line: - return # nothing to see here - - # To distinguish preprocessor directive from regular declaration later. - if line.startswith('#'): - line += ";" - # - # Split the declaration on any of { } or ;, and accumulate pieces - # until we hit a semicolon while not inside {brackets} - # - r = KernRe(r'(.*?)([{};])') - for chunk in r.split(line): - if chunk: # Ignore empty matches - self.entry.prototype += chunk - # - # This cries out for a match statement ... someday after we can - # drop Python 3.9 ... - # - if chunk == '{': - self.entry.brcount += 1 - elif chunk == '}': - self.entry.brcount -= 1 - elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) - self.reset_state(ln) - return - # - # We hit the end of the line while still in the declaration; put - # in a space to represent the newline. - # - self.entry.prototype += ' ' - - def process_proto(self, ln, line): - """STATE_PROTO: reading a function/whatever prototype.""" - - if doc_inline_oneline.search(line): - self.entry.begin_section(ln, doc_inline_oneline.group(1)) - self.entry.add_text(doc_inline_oneline.group(2)) - self.dump_section() - - elif doc_inline_start.search(line): - self.state = state.INLINE_NAME - - elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) - - else: - self.process_proto_type(ln, line) - - def process_docblock(self, ln, line): - """STATE_DOCBLOCK: within a DOC: block.""" - - if doc_end.search(line): - self.dump_section() - self.output_declaration("doc", self.entry.identifier) - self.reset_state(ln) - - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - - def parse_export(self): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - - for line in fp: - self.process_export(export_table, line) - - except IOError: - return None - - return export_table - - # - # The state/action table telling us which function to invoke in - # each state. - # - state_actions = { - state.NORMAL: process_normal, - state.NAME: process_name, - state.BODY: process_body, - state.DECLARATION: process_decl, - state.SPECIAL_SECTION: process_special, - state.INLINE_NAME: process_inline_name, - state.INLINE_TEXT: process_inline_text, - state.PROTO: process_proto, - state.DOCBLOCK: process_docblock, - } - - def parse_kdoc(self): - """ - Open and process each line of a C source file. - The parsing is controlled via a state machine, and the line is passed - to a different process function depending on the state. The process - function may update the state as needed. - - Besides parsing kernel-doc tags, it also parses export symbols. - """ - - prev = "" - prev_ln = None - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - for ln, line in enumerate(fp): - - line = line.expandtabs().strip("\n") - - # Group continuation lines on prototypes - if self.state == state.PROTO: - if line.endswith("\\"): - prev += line.rstrip("\\") - if not prev_ln: - prev_ln = ln - continue - - if prev: - ln = prev_ln - line = prev + line - prev = "" - prev_ln = None - - self.config.log.debug("%d %s: %s", - ln, state.name[self.state], - line) - - # This is an optimization over the original script. - # There, when export_file was used for the same file, - # it was read twice. Here, we use the already-existing - # loop to parse exported symbols as well. - # - if (self.state != state.NORMAL) or \ - not self.process_export(export_table, line): - # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - except OSError: - self.config.log.error(f"Error: Cannot open file {self.fname}") - - return export_table, self.entries diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py deleted file mode 100644 index 612223e1e723..000000000000 --- a/scripts/lib/kdoc/kdoc_re.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . - -""" -Regular expression ancillary classes. - -Those help caching regular expressions and do matching for kernel-doc. -""" - -import re - -# Local cache for regular expressions -re_cache = {} - - -class KernRe: - """ - Helper class to simplify regex declaration and usage, - - It calls re.compile for a given pattern. It also allows adding - regular expressions and define sub at class init time. - - Regular expressions can be cached via an argument, helping to speedup - searches. - """ - - def _add_regex(self, string, flags): - """ - Adds a new regex or re-use it from the cache. - """ - self.regex = re_cache.get(string, None) - if not self.regex: - self.regex = re.compile(string, flags=flags) - if self.cache: - re_cache[string] = self.regex - - def __init__(self, string, cache=True, flags=0): - """ - Compile a regular expression and initialize internal vars. - """ - - self.cache = cache - self.last_match = None - - self._add_regex(string, flags) - - def __str__(self): - """ - Return the regular expression pattern. - """ - return self.regex.pattern - - def __add__(self, other): - """ - Allows adding two regular expressions into one. - """ - - return KernRe(str(self) + str(other), cache=self.cache or other.cache, - flags=self.regex.flags | other.regex.flags) - - def match(self, string): - """ - Handles a re.match storing its results - """ - - self.last_match = self.regex.match(string) - return self.last_match - - def search(self, string): - """ - Handles a re.search storing its results - """ - - self.last_match = self.regex.search(string) - return self.last_match - - def findall(self, string): - """ - Alias to re.findall - """ - - return self.regex.findall(string) - - def split(self, string): - """ - Alias to re.split - """ - - return self.regex.split(string) - - def sub(self, sub, string, count=0): - """ - Alias to re.sub - """ - - return self.regex.sub(sub, string, count=count) - - def group(self, num): - """ - Returns the group results of the last match - """ - - return self.last_match.group(num) - - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parenthesis of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. - # - # The content inside parenthesis are converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what it is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): - """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. - """ - - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - """ - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if r'\1' is used, it works just like re: it places there the - matched paired data with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c0997d6861dc..c48bb05dad82 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -9,13 +9,17 @@ """ Detect problematic Noto CJK variable fonts. -or more details, see lib/latex_fonts.py. +or more details, see .../tools/lib/python/kdoc/latex_fonts.py. """ import argparse import sys +import os.path -from lib.latex_fonts import LatexFontChecker +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) + +from latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index da69e77559cc..e0abfe12fac7 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,7 +14,7 @@ import sys # Import Python modules -LIB_DIR = "../../scripts/lib/abi" +LIB_DIR = "../lib/python/abi" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/tools/docs/lib/__init__.py b/tools/docs/lib/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tools/docs/lib/enrich_formatter.py b/tools/docs/lib/enrich_formatter.py deleted file mode 100644 index bb171567a4ca..000000000000 --- a/tools/docs/lib/enrich_formatter.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2025 by Mauro Carvalho Chehab . - -""" -Ancillary argparse HelpFormatter class that works on a similar way as -argparse.RawDescriptionHelpFormatter, e.g. description maintains line -breaks, but it also implement transformations to the help text. The -actual transformations ar given by enrich_text(), if the output is tty. - -Currently, the follow transformations are done: - - - Positional arguments are shown in upper cases; - - if output is TTY, ``var`` and positional arguments are shown prepended - by an ANSI SGR code. This is usually translated to bold. On some - terminals, like, konsole, this is translated into a colored bold text. -""" - -import argparse -import re -import sys - -class EnrichFormatter(argparse.HelpFormatter): - """ - Better format the output, making easier to identify the positional args - and how they're used at the __doc__ description. - """ - def __init__(self, *args, **kwargs): - """Initialize class and check if is TTY""" - super().__init__(*args, **kwargs) - self._tty = sys.stdout.isatty() - - def enrich_text(self, text): - """Handle ReST markups (currently, only ``foo``)""" - if self._tty and text: - # Replace ``text`` with ANSI SGR (bold) - return re.sub(r'\`\`(.+?)\`\`', - lambda m: f'\033[1m{m.group(1)}\033[0m', text) - return text - - def _fill_text(self, text, width, indent): - """Enrich descriptions with markups on it""" - enriched = self.enrich_text(text) - return "\n".join(indent + line for line in enriched.splitlines()) - - def _format_usage(self, usage, actions, groups, prefix): - """Enrich positional arguments at usage: line""" - - prog = self._prog - parts = [] - - for action in actions: - if action.option_strings: - opt = action.option_strings[0] - if action.nargs != 0: - opt += f" {action.dest.upper()}" - parts.append(f"[{opt}]") - else: - # Positional argument - parts.append(self.enrich_text(f"``{action.dest.upper()}``")) - - usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" - return usage_text - - def _format_action_invocation(self, action): - """Enrich argument names""" - if not action.option_strings: - return self.enrich_text(f"``{action.dest.upper()}``") - - return ", ".join(action.option_strings) diff --git a/tools/docs/lib/latex_fonts.py b/tools/docs/lib/latex_fonts.py deleted file mode 100755 index 29317f8006ea..000000000000 --- a/tools/docs/lib/latex_fonts.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# Ported to Python by (c) Mauro Carvalho Chehab, 2025 - -""" -Detect problematic Noto CJK variable fonts. - -For "make pdfdocs", reports of build errors of translations.pdf started -arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -tumbleweed have started deploying variable-font [3] format of "Noto CJK" -fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -does not (and likely never will) understand variable fonts for historical -reasons. - -The build error happens even when both of variable- and non-variable-format -fonts are found on the build system. To make matters worse, Fedora enlists -variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, --zh_TW, etc. Hence developers who have interest in CJK pages are more -likely to encounter the build errors. - -This script is invoked from the error path of "make pdfdocs" and emits -suggestions if variable-font files of "Noto CJK" fonts are in the list of -fonts accessible from XeTeX. - -References: -[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -[3]: https://en.wikipedia.org/wiki/Variable_font -[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -[5]: https://build.opensuse.org/request/show/1157217 - -#=========================================================================== -Workarounds for building translations.pdf -#=========================================================================== - -* Denylist "variable font" Noto CJK fonts. - - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with - tweaks if necessary. Remove leading "". - - Path of fontconfig/fonts.conf can be overridden by setting an env - variable FONTS_CONF_DENY_VF. - - * Template: ------------------------------------------------------------------ - - - - - - - - /usr/share/fonts/google-noto-*-cjk-vf-fonts - - /usr/share/fonts/truetype/Noto*CJK*-VF.otf - - - ------------------------------------------------------------------ - - The denylisting is activated for "make pdfdocs". - -* For skipping CJK pages in PDF - - Uninstall texlive-xecjk. - Denylisting is not needed in this case. - -* For printing CJK pages in PDF - - Need non-variable "Noto CJK" fonts. - * Fedora - - google-noto-sans-cjk-fonts - - google-noto-serif-cjk-fonts - * openSUSE tumbleweed - - Non-variable "Noto CJK" fonts are not available as distro packages - as of April, 2024. Fetch a set of font files from upstream Noto - CJK Font released at: - https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc - and at: - https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc - , then uncompress and deploy them. - - Remember to update fontconfig cache by running fc-cache. - -!!! Caution !!! - Uninstalling "variable font" packages can be dangerous. - They might be depended upon by other packages important for your work. - Denylisting should be less invasive, as it is effective only while - XeLaTeX runs in "make pdfdocs". -""" - -import os -import re -import subprocess -import textwrap -import sys - -class LatexFontChecker: - """ - Detect problems with CJK variable fonts that affect PDF builds for - translations. - """ - - def __init__(self, deny_vf=None): - if not deny_vf: - deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") - - self.environ = os.environ.copy() - self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) - - self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") - - def description(self): - return __doc__ - - def get_noto_cjk_vf_fonts(self): - """Get Noto CJK fonts""" - - cjk_fonts = set() - cmd = ["fc-list", ":", "file", "family", "variable"] - try: - result = subprocess.run(cmd,stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - env=self.environ, - check=True) - - except subprocess.CalledProcessError as exc: - sys.exit(f"Error running fc-list: {repr(exc)}") - - for line in result.stdout.splitlines(): - if 'variable=True' not in line: - continue - - match = self.re_cjk.search(line) - if match: - cjk_fonts.add(match.group(1)) - - return sorted(cjk_fonts) - - def check(self): - """Check for problems with CJK fonts""" - - fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") - if not fonts: - return None - - rel_file = os.path.relpath(__file__, os.getcwd()) - - msg = "=" * 77 + "\n" - msg += 'XeTeX is confused by "variable font" files listed below:\n' - msg += fonts + "\n" - msg += textwrap.dedent(f""" - For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. - Or, CJK pages can be skipped by uninstalling texlive-xecjk. - - For more info on denylisting, other options, and variable font, run: - - tools/docs/check-variable-fonts.py -h - """) - msg += "=" * 77 - - return msg diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py deleted file mode 100755 index 25361996cd20..000000000000 --- a/tools/docs/lib/parse_data_structs.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . -# pylint: disable=R0912,R0915 - -""" -Parse a source file or header, creating ReStructured Text cross references. - -It accepts an optional file to change the default symbol reference or to -suppress symbols from the output. - -It is capable of identifying defines, functions, structs, typedefs, -enums and enum symbols and create cross-references for all of them. -It is also capable of distinguish #define used for specifying a Linux -ioctl. - -The optional rules file contains a set of rules like: - - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` -""" - -import os -import re -import sys - - -class ParseDataStructs: - """ - Creates an enriched version of a Kernel header file with cross-links - to each C data structure type. - - It is meant to allow having a more comprehensive documentation, where - uAPI headers will create cross-reference links to the code. - - It is capable of identifying defines, functions, structs, typedefs, - enums and enum symbols and create cross-references for all of them. - It is also capable of distinguish #define used for specifying a Linux - ioctl. - - By default, it create rules for all symbols and defines, but it also - allows parsing an exception file. Such file contains a set of rules - using the syntax below: - - 1. Ignore rules: - - ignore ` - - Removes the symbol from reference generation. - - 2. Replace rules: - - replace - - Replaces how old_symbol with a new reference. The new_reference can be: - - - A simple symbol name; - - A full Sphinx reference. - - 3. Namespace rules - - namespace - - Sets C namespace to be used during cross-reference generation. Can - be overridden by replace rules. - - On ignore and replace rules, can be: - - ioctl: for defines that end with _IO*, e.g. ioctl definitions - - define: for other defines - - symbol: for symbols defined within enums; - - typedef: for typedefs; - - enum: for the name of a non-anonymous enum; - - struct: for structs. - - Examples: - - ignore define __LINUX_MEDIA_H - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` - - namespace MC - """ - - # Parser regexes with multiple ways to capture enums and structs - RE_ENUMS = [ - re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*enum\s+([\w_]+)\s*$"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), - ] - RE_STRUCTS = [ - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), - ] - - # FIXME: the original code was written a long time before Sphinx C - # domain to have multiple namespaces. To avoid to much turn at the - # existing hyperlinks, the code kept using "c:type" instead of the - # right types. To change that, we need to change the types not only - # here, but also at the uAPI media documentation. - DEF_SYMBOL_TYPES = { - "ioctl": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "IOCTL Commands", - }, - "define": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Macros and Definitions", - }, - # We're calling each definition inside an enum as "symbol" - "symbol": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Enumeration values", - }, - "typedef": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Type Definitions", - }, - # This is the description of the enum itself - "enum": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Enumerations", - }, - "struct": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Structures", - }, - } - - def __init__(self, debug: bool = False): - """Initialize internal vars""" - self.debug = debug - self.data = "" - - self.symbols = {} - - self.namespace = None - self.ignore = [] - self.replace = [] - - for symbol_type in self.DEF_SYMBOL_TYPES: - self.symbols[symbol_type] = {} - - def read_exceptions(self, fname: str): - if not fname: - return - - name = os.path.basename(fname) - - with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: - for ln, line in enumerate(f): - ln += 1 - line = line.strip() - if not line or line.startswith("#"): - continue - - # ignore rules - match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) - - if match: - self.ignore.append((ln, match.group(1), match.group(2))) - continue - - # replace rules - match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) - if match: - self.replace.append((ln, match.group(1), match.group(2), - match.group(3))) - continue - - match = re.match(r"^namespace\s+(\S+)", line) - if match: - self.namespace = match.group(1) - continue - - sys.exit(f"{name}:{ln}: invalid line: {line}") - - def apply_exceptions(self): - """ - Process exceptions file with rules to ignore or replace references. - """ - - # Handle ignore rules - for ln, c_type, symbol in self.ignore: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - d = self.symbols[c_type] - if symbol in d: - del d[symbol] - - # Handle replace rules - for ln, c_type, old, new in self.replace: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - reftype = None - - # Parse reference type when the type is specified - - match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) - if match: - reftype = f":c:{match.group(1)}" - new = match.group(2) - else: - match = re.search(r"(\:ref)\:\`(.+)\`", new) - if match: - reftype = match.group(1) - new = match.group(2) - - # If the replacement rule doesn't have a type, get default - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") - - new_ref = f"{reftype}:`{old} <{new}>`" - - # Change self.symbols to use the replacement rule - if old in self.symbols[c_type]: - (_, ln) = self.symbols[c_type][old] - self.symbols[c_type][old] = (new_ref, ln) - else: - print(f"{name}:{ln}: Warning: can't find {old} {c_type}") - - def store_type(self, ln, symbol_type: str, symbol: str, - ref_name: str = None, replace_underscores: bool = True): - """ - Stores a new symbol at self.symbols under symbol_type. - - By default, underscores are replaced by "-" - """ - defs = self.DEF_SYMBOL_TYPES[symbol_type] - - prefix = defs.get("prefix", "") - suffix = defs.get("suffix", "") - ref_type = defs.get("ref_type") - - # Determine ref_link based on symbol type - if ref_type or self.namespace: - if not ref_name: - ref_name = symbol.lower() - - # c-type references don't support hash - if ref_type == ":ref" and replace_underscores: - ref_name = ref_name.replace("_", "-") - - # C domain references may have namespaces - if ref_type.startswith(":c:"): - if self.namespace: - ref_name = f"{self.namespace}.{ref_name}" - - if ref_type: - ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" - else: - ref_link = f"`{symbol} <{ref_name}>`" - else: - ref_link = symbol - - self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) - - def store_line(self, line): - """Stores a line at self.data, properly indented""" - line = " " + line.expandtabs() - self.data += line.rstrip(" ") - - def parse_file(self, file_in: str, exceptions: str = None): - """Reads a C source file and get identifiers""" - self.data = "" - is_enum = False - is_comment = False - multiline = "" - - self.read_exceptions(exceptions) - - with open(file_in, "r", - encoding="utf-8", errors="backslashreplace") as f: - for line_no, line in enumerate(f): - self.store_line(line) - line = line.strip("\n") - - # Handle continuation lines - if line.endswith(r"\\"): - multiline += line[-1] - continue - - if multiline: - line = multiline + line - multiline = "" - - # Handle comments. They can be multilined - if not is_comment: - if re.search(r"/\*.*", line): - is_comment = True - else: - # Strip C99-style comments - line = re.sub(r"(//.*)", "", line) - - if is_comment: - if re.search(r".*\*/", line): - is_comment = False - else: - multiline = line - continue - - # At this point, line variable may be a multilined statement, - # if lines end with \ or if they have multi-line comments - # With that, it can safely remove the entire comments, - # and there's no need to use re.DOTALL for the logic below - - line = re.sub(r"(/\*.*\*/)", "", line) - if not line.strip(): - continue - - # It can be useful for debug purposes to print the file after - # having comments stripped and multi-lines grouped. - if self.debug > 1: - print(f"line {line_no + 1}: {line}") - - # Now the fun begins: parse each type and store it. - - # We opted for a two parsing logic here due to: - # 1. it makes easier to debug issues not-parsed symbols; - # 2. we want symbol replacement at the entire content, not - # just when the symbol is detected. - - if is_enum: - match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) - if match: - self.store_type(line_no, "symbol", match.group(1)) - if "}" in line: - is_enum = False - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) - if match: - self.store_type(line_no, "ioctl", match.group(1), - replace_underscores=False) - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) - if match: - self.store_type(line_no, "define", match.group(1)) - continue - - match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", - line) - if match: - name = match.group(2).strip() - symbol = match.group(3) - self.store_type(line_no, "typedef", symbol, ref_name=name) - continue - - for re_enum in self.RE_ENUMS: - match = re_enum.match(line) - if match: - self.store_type(line_no, "enum", match.group(1)) - is_enum = True - break - - for re_struct in self.RE_STRUCTS: - match = re_struct.match(line) - if match: - self.store_type(line_no, "struct", match.group(1)) - break - - self.apply_exceptions() - - def debug_print(self): - """ - Print debug information containing the replacement rules per symbol. - To make easier to check, group them per type. - """ - if not self.debug: - return - - for c_type, refs in self.symbols.items(): - if not refs: # Skip empty dictionaries - continue - - print(f"{c_type}:") - - for symbol, (ref, ln) in sorted(refs.items()): - print(f" #{ln:<5d} {symbol} -> {ref}") - - print() - - def gen_output(self): - """Write the formatted output to a file.""" - - # Avoid extra blank lines - text = re.sub(r"\s+$", "", self.data) + "\n" - text = re.sub(r"\n\s+\n", "\n\n", text) - - # Escape Sphinx special characters - text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) - - # Source uAPI files may have special notes. Use bold font for them - text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) - - # Delimiters to catch the entire symbol after escaped - start_delim = r"([ \n\t\(=\*\@])" - end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" - - # Process all reference types - for ref_dict in self.symbols.values(): - for symbol, (replacement, _) in ref_dict.items(): - symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) - text = re.sub(fr'{start_delim}{symbol}{end_delim}', - fr'\1{replacement}\2', text) - - # Remove "\ " where not needed: before spaces and at the end of lines - text = re.sub(r"\\ ([\n ])", r"\1", text) - text = re.sub(r" \\ ", " ", text) - - return text - - def gen_toc(self): - """ - Create a list of symbols to be part of a TOC contents table - """ - text = [] - - # Sort symbol types per description - symbol_descriptions = [] - for k, v in self.DEF_SYMBOL_TYPES.items(): - symbol_descriptions.append((v['description'], k)) - - symbol_descriptions.sort() - - # Process each category - for description, c_type in symbol_descriptions: - - refs = self.symbols[c_type] - if not refs: # Skip empty categories - continue - - text.append(f"{description}") - text.append("-" * len(description)) - text.append("") - - # Sort symbols alphabetically - for symbol, (ref, ln) in sorted(refs.items()): - text.append(f"- LINENO_{ln}: {ref}") - - text.append("") # Add empty line between categories - - return "\n".join(text) - - def write_output(self, file_in: str, file_out: str, toc: bool): - title = os.path.basename(file_in) - - if toc: - text = self.gen_toc() - else: - text = self.gen_output() - - with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: - f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") - f.write(f"{title}\n") - f.write("=" * len(title) + "\n\n") - - if not toc: - f.write(".. parsed-literal::\n\n") - - f.write(text) diff --git a/tools/docs/lib/python_version.py b/tools/docs/lib/python_version.py deleted file mode 100644 index 4fde1b882164..000000000000 --- a/tools/docs/lib/python_version.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-or-later -# Copyright (c) 2017-2025 Mauro Carvalho Chehab - -""" -Handle Python version check logic. - -Not all Python versions are supported by scripts. Yet, on some cases, -like during documentation build, a newer version of python could be -available. - -This class allows checking if the minimal requirements are followed. - -Better than that, PythonVersion.check_python() not only checks the minimal -requirements, but it automatically switches to a the newest available -Python version if present. - -""" - -import os -import re -import subprocess -import shlex -import sys - -from glob import glob -from textwrap import indent - -class PythonVersion: - """ - Ancillary methods that checks for missing dependencies for different - types of types, like binaries, python modules, rpm deps, etc. - """ - - def __init__(self, version): - """Ïnitialize self.version tuple from a version string""" - self.version = self.parse_version(version) - - @staticmethod - def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - - @staticmethod - def ver_str(version): - """Returns a version tuple as major.minor.patch""" - return ".".join([str(x) for x in version]) - - @staticmethod - def cmd_print(cmd, max_len=80): - cmd_line = [] - - for w in cmd: - w = shlex.quote(w) - - if cmd_line: - if not max_len or len(cmd_line[-1]) + len(w) < max_len: - cmd_line[-1] += " " + w - continue - else: - cmd_line[-1] += " \\" - cmd_line.append(w) - else: - cmd_line.append(w) - - return "\n ".join(cmd_line) - - def __str__(self): - """Returns a version tuple as major.minor.patch from self.version""" - return self.ver_str(self.version) - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - kwargs = {} - if sys.version_info < (3, 7): - kwargs['universal_newlines'] = True - else: - kwargs['text'] = True - - result = subprocess.run([cmd, "--version"], - stdout = subprocess.PIPE, - stderr = subprocess.PIPE, - **kwargs, check=False) - - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return PythonVersion.parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(min_version): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9][0-9]", - "python3.[0-9]", - ] - - python_cmd = [] - - # Seek for a python binary newer than min_version - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = PythonVersion.get_python_version(cmd) - if version >= min_version: - python_cmd.append((version, cmd)) - - return sorted(python_cmd, reverse=True) - - @staticmethod - def check_python(min_version, show_alternatives=False, bail_out=False, - success_on_error=False): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= min_version: - ver = PythonVersion.ver_str(cur_ver) - return - - python_ver = PythonVersion.ver_str(cur_ver) - - available_versions = PythonVersion.find_python(min_version) - if not available_versions: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") - print(" Can't find a new version. This script may fail") - return - - script_path = os.path.abspath(sys.argv[0]) - - # Check possible alternatives - if available_versions: - new_python_cmd = available_versions[0][1] - else: - new_python_cmd = None - - if show_alternatives and available_versions: - print("You could run, instead:") - for _, cmd in available_versions: - args = [cmd, script_path] + sys.argv[1:] - - cmd_str = indent(PythonVersion.cmd_print(args), " ") - print(f"{cmd_str}\n") - - if bail_out: - msg = f"Python {python_ver} not supported. Bailing out" - if success_on_error: - print(msg, file=sys.stderr) - sys.exit(0) - else: - sys.exit(msg) - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - # Restart script using the newer version - args = [new_python_cmd, script_path] + sys.argv[1:] - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index 6716c7300258..ed9cf2bf22de 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -24,10 +24,13 @@ The optional ``FILE_RULES`` contains a set of rules like: replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` """ -import argparse +import argparse, sys +import os.path -from lib.parse_data_structs import ParseDataStructs -from lib.enrich_formatter import EnrichFormatter +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from parse_data_structs import ParseDataStructs +from enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 1efaca3d16aa..ce0b1b5292da 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -56,14 +56,15 @@ import sys from concurrent import futures from glob import glob -from lib.python_version import PythonVersion -from lib.latex_fonts import LatexFontChecker -LIB_DIR = "../../scripts/lib" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary +from python_version import PythonVersion +from latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index 647e1f60357f..d8c9fb76948d 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -32,8 +32,11 @@ import re import subprocess import sys from glob import glob +import os.path -from lib.python_version import PythonVersion +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py new file mode 100644 index 000000000000..66a738013ce1 --- /dev/null +++ b/tools/lib/python/abi/abi_parser.py @@ -0,0 +1,628 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +from argparse import Namespace +import logging +import os +import re + +from pprint import pformat +from random import randrange, seed + +# Import Python modules + +from helpers import AbiDebug, ABI_DIR + + +class AbiParser: + """Main class to parse ABI files""" + + TAGS = r"(what|where|date|kernelversion|contact|description|users)" + XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" + + def __init__(self, directory, logger=None, + enable_lineno=False, show_warnings=True, debug=0): + """Stores arguments for the class and initialize class vars""" + + self.directory = directory + self.enable_lineno = enable_lineno + self.show_warnings = show_warnings + self.debug = debug + + if not logger: + self.log = logging.getLogger("get_abi") + else: + self.log = logger + + self.data = {} + self.what_symbols = {} + self.file_refs = {} + self.what_refs = {} + + # Ignore files that contain such suffixes + self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") + + # Regular expressions used on parser + self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) + self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) + self.re_valid = re.compile(self.TAGS) + self.re_start_spc = re.compile(r"(\s*)(\S.*)") + self.re_whitespace = re.compile(r"^\s+") + + # Regular used on print + self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") + self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") + self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") + self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") + self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") + self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") + self.re_xref_node = re.compile(self.XREF) + + def warn(self, fdata, msg, extra=None): + """Displays a parse error if warning is enabled""" + + if not self.show_warnings: + return + + msg = f"{fdata.fname}:{fdata.ln}: {msg}" + if extra: + msg += "\n\t\t" + extra + + self.log.warning(msg) + + def add_symbol(self, what, fname, ln=None, xref=None): + """Create a reference table describing where each 'what' is located""" + + if what not in self.what_symbols: + self.what_symbols[what] = {"file": {}} + + if fname not in self.what_symbols[what]["file"]: + self.what_symbols[what]["file"][fname] = [] + + if ln and ln not in self.what_symbols[what]["file"][fname]: + self.what_symbols[what]["file"][fname].append(ln) + + if xref: + self.what_symbols[what]["xref"] = xref + + def _parse_line(self, fdata, line): + """Parse a single line of an ABI file""" + + new_what = False + new_tag = False + content = None + + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + sep = match.group(2) + content = match.group(3) + + match = self.re_valid.search(new) + if match: + new_tag = match.group(1) + else: + if fdata.tag == "description": + # New "tag" is actually part of description. + # Don't consider it a tag + new_tag = False + elif fdata.tag != "": + self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) + + if new_tag: + # "where" is Invalid, but was a common mistake. Warn if found + if new_tag == "where": + self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") + new_tag = "what" + + if new_tag == "what": + fdata.space = None + + if content not in self.what_symbols: + self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) + + if fdata.tag == "what": + fdata.what.append(content.strip("\n")) + else: + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fdata.fname, + ln=fdata.what_ln, xref=fdata.key) + + fdata.label = content + new_what = True + + key = "abi_" + content.lower() + fdata.key = self.re_unprintable.sub("_", key).strip("_") + + # Avoid duplicated keys but using a defined seed, to make + # the namespace identical if there aren't changes at the + # ABI symbols + seed(42) + + while fdata.key in self.data: + char = randrange(0, 51) + ord("A") + if char > ord("Z"): + char += ord("a") - ord("Z") - 1 + + fdata.key += chr(char) + + if fdata.key and fdata.key not in self.data: + self.data[fdata.key] = { + "what": [content], + "file": [fdata.file_ref], + "path": fdata.ftype, + "line_no": fdata.ln, + } + + fdata.what = self.data[fdata.key]["what"] + + self.what_refs[content] = fdata.key + fdata.tag = new_tag + fdata.what_ln = fdata.ln + + if fdata.nametag["what"]: + t = (content, fdata.key) + if t not in fdata.nametag["symbols"]: + fdata.nametag["symbols"].append(t) + + return + + if fdata.tag and new_tag: + fdata.tag = new_tag + + if new_what: + fdata.label = "" + + if "description" in self.data[fdata.key]: + self.data[fdata.key]["description"] += "\n\n" + + if fdata.file_ref not in self.data[fdata.key]["file"]: + self.data[fdata.key]["file"].append(fdata.file_ref) + + if self.debug == AbiDebug.WHAT_PARSING: + self.log.debug("what: %s", fdata.what) + + if not fdata.what: + self.warn(fdata, "'What:' should come first:", line) + return + + if new_tag == "description": + fdata.space = None + + if content: + sep = sep.replace(":", " ") + + c = " " * len(new_tag) + sep + content + c = c.expandtabs() + + match = self.re_start_spc.match(c) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + content = match.group(2) + "\n" + + self.data[fdata.key][fdata.tag] = content + + return + + # Store any contents before tags at the database + if not fdata.tag and "what" in fdata.nametag: + fdata.nametag["description"] += line + return + + if fdata.tag == "description": + content = line.expandtabs() + + if self.re_whitespace.sub("", content) == "": + self.data[fdata.key][fdata.tag] += "\n" + return + + if fdata.space is None: + match = self.re_start_spc.match(content) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + + content = match.group(2) + "\n" + else: + if content.startswith(fdata.space): + content = content[len(fdata.space):] + + else: + fdata.space = "" + + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += content + return + + content = line.strip() + if fdata.tag: + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") + return + + # Everything else is error + if content: + self.warn(fdata, "Unexpected content", line) + + def parse_readme(self, nametag, fname): + """Parse ABI README file""" + + nametag["what"] = ["Introduction"] + nametag["path"] = "README" + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + + match = self.re_valid.search(new) + if match: + nametag["description"] += "\n:" + line + continue + + nametag["description"] += line + + def parse_file(self, fname, path, basename): + """Parse a single file""" + + ref = f"abi_file_{path}_{basename}" + ref = self.re_unprintable.sub("_", ref).strip("_") + + # Store per-file state into a namespace variable. This will be used + # by the per-line parser state machine and by the warning function. + fdata = Namespace + + fdata.fname = fname + fdata.name = basename + + pos = fname.find(ABI_DIR) + if pos > 0: + f = fname[pos:] + else: + f = fname + + fdata.file_ref = (f, ref) + self.file_refs[f] = ref + + fdata.ln = 0 + fdata.what_ln = 0 + fdata.tag = "" + fdata.label = "" + fdata.what = [] + fdata.key = None + fdata.xrefs = None + fdata.space = None + fdata.ftype = path.split("/")[0] + + fdata.nametag = {} + fdata.nametag["what"] = [f"ABI file {path}/{basename}"] + fdata.nametag["type"] = "File" + fdata.nametag["path"] = fdata.ftype + fdata.nametag["file"] = [fdata.file_ref] + fdata.nametag["line_no"] = 1 + fdata.nametag["description"] = "" + fdata.nametag["symbols"] = [] + + self.data[ref] = fdata.nametag + + if self.debug & AbiDebug.WHAT_OPEN: + self.log.debug("Opening file %s", fname) + + if basename == "README": + self.parse_readme(fdata.nametag, fname) + return + + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + fdata.ln += 1 + + self._parse_line(fdata, line) + + if "description" in fdata.nametag: + fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") + + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fname, xref=fdata.key) + + def _parse_abi(self, root=None): + """Internal function to parse documentation ABI recursively""" + + if not root: + root = self.directory + + with os.scandir(root) as obj: + for entry in obj: + name = os.path.join(root, entry.name) + + if entry.is_dir(): + self._parse_abi(name) + continue + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if basename.startswith("."): + continue + + if basename.endswith(self.ignore_suffixes): + continue + + path = self.re_abi_dir.sub("", os.path.dirname(name)) + + self.parse_file(name, path, basename) + + def parse_abi(self, root=None): + """Parse documentation ABI""" + + self._parse_abi(root) + + if self.debug & AbiDebug.DUMP_ABI_STRUCTS: + self.log.debug(pformat(self.data)) + + def desc_txt(self, desc): + """Print description as found inside ABI files""" + + desc = desc.strip(" \t\n") + + return desc + "\n\n" + + def xref(self, fname): + """ + Converts a Documentation/ABI + basename into a ReST cross-reference + """ + + xref = self.file_refs.get(fname) + if not xref: + return None + else: + return xref + + def desc_rst(self, desc): + """Enrich ReST output by creating cross-references""" + + # Remove title markups from the description + # Having titles inside ABI files will only work if extra + # care would be taken in order to strictly follow the same + # level order for each markup. + desc = self.re_title_mark.sub("\n\n", "\n" + desc) + desc = desc.rstrip(" \t\n").lstrip("\n") + + # Python's regex performance for non-compiled expressions is a lot + # than Perl, as Perl automatically caches them at their + # first usage. Here, we'll need to do the same, as otherwise the + # performance penalty is be high + + new_desc = "" + for d in desc.split("\n"): + if d == "": + new_desc += "\n" + continue + + # Use cross-references for doc files where needed + d = self.re_doc.sub(r":doc:`/\1`", d) + + # Use cross-references for ABI generated docs where needed + matches = self.re_abi.findall(d) + for m in matches: + abi = m[0] + m[1] + + xref = self.file_refs.get(abi) + if not xref: + # This may happen if ABI is on a separate directory, + # like parsing ABI testing and symbol is at stable. + # The proper solution is to move this part of the code + # for it to be inside sphinx/kernel_abi.py + self.log.info("Didn't find ABI reference for '%s'", abi) + else: + new = self.re_escape.sub(r"\\\1", m[1]) + d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) + + # Seek for cross reference symbols like /sys/... + # Need to be careful to avoid doing it on a code block + if d[0] not in [" ", "\t"]: + matches = self.re_xref_node.findall(d) + for m in matches: + # Finding ABI here is more complex due to wildcards + xref = self.what_refs.get(m) + if xref: + new = self.re_escape.sub(r"\\\1", m) + d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) + + new_desc += d + "\n" + + return new_desc + "\n\n" + + def doc(self, output_in_txt=False, show_symbols=True, show_file=True, + filter_path=None): + """Print ABI at stdout""" + + part = None + for key, v in sorted(self.data.items(), + key=lambda x: (x[1].get("type", ""), + x[1].get("what"))): + + wtype = v.get("type", "Symbol") + file_ref = v.get("file") + names = v.get("what", [""]) + + if wtype == "File": + if not show_file: + continue + else: + if not show_symbols: + continue + + if filter_path: + if v.get("path") != filter_path: + continue + + msg = "" + + if wtype != "File": + cur_part = names[0] + if cur_part.find("/") >= 0: + match = self.re_what.match(cur_part) + if match: + symbol = match.group(1).rstrip("/") + cur_part = "Symbols under " + symbol + + if cur_part and cur_part != part: + part = cur_part + msg += part + "\n"+ "-" * len(part) +"\n\n" + + msg += f".. _{key}:\n\n" + + max_len = 0 + for i in range(0, len(names)): # pylint: disable=C0200 + names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" + + max_len = max(max_len, len(names[i])) + + msg += "+-" + "-" * max_len + "-+\n" + for name in names: + msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" + msg += "+-" + "-" * max_len + "-+\n" + msg += "\n" + + for ref in file_ref: + if wtype == "File": + msg += f".. _{ref[1]}:\n\n" + else: + base = os.path.basename(ref[0]) + msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" + + if wtype == "File": + msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" + + desc = v.get("description") + if not desc and wtype != "File": + msg += f"DESCRIPTION MISSING for {names[0]}\n\n" + + if desc: + if output_in_txt: + msg += self.desc_txt(desc) + else: + msg += self.desc_rst(desc) + + symbols = v.get("symbols") + if symbols: + msg += "Has the following ABI:\n\n" + + for w, label in symbols: + # Escape special chars from content + content = self.re_escape.sub(r"\\\1", w) + + msg += f"- :ref:`{content} <{label}>`\n\n" + + users = v.get("users") + if users and users.strip(" \t\n"): + users = users.strip("\n").replace('\n', '\n\t') + msg += f"Users:\n\t{users}\n\n" + + ln = v.get("line_no", 1) + + yield (msg, file_ref[0][0], ln) + + def check_issues(self): + """Warn about duplicated ABI entries""" + + for what, v in self.what_symbols.items(): + files = v.get("file") + if not files: + # Should never happen if the parser works properly + self.log.warning("%s doesn't have a file associated", what) + continue + + if len(files) == 1: + continue + + f = [] + for fname, lines in sorted(files.items()): + if not lines: + f.append(f"{fname}") + elif len(lines) == 1: + f.append(f"{fname}:{lines[0]}") + else: + m = fname + "lines " + m += ", ".join(str(x) for x in lines) + f.append(m) + + self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) + + def search_symbols(self, expr): + """ Searches for ABI symbols """ + + regex = re.compile(expr, re.I) + + found_keys = 0 + for t in sorted(self.data.items(), key=lambda x: [0]): + v = t[1] + + wtype = v.get("type", "") + if wtype == "File": + continue + + for what in v.get("what", [""]): + if regex.search(what): + found_keys += 1 + + kernelversion = v.get("kernelversion", "").strip(" \t\n") + date = v.get("date", "").strip(" \t\n") + contact = v.get("contact", "").strip(" \t\n") + users = v.get("users", "").strip(" \t\n") + desc = v.get("description", "").strip(" \t\n") + + files = [] + for f in v.get("file", ()): + files.append(f[0]) + + what = str(found_keys) + ". " + what + title_tag = "-" * len(what) + + print(f"\n{what}\n{title_tag}\n") + + if kernelversion: + print(f"Kernel version:\t\t{kernelversion}") + + if date: + print(f"Date:\t\t\t{date}") + + if contact: + print(f"Contact:\t\t{contact}") + + if users: + print(f"Users:\t\t\t{users}") + + print("Defined on file(s):\t" + ", ".join(files)) + + if desc: + desc = desc.strip("\n") + print(f"\n{desc}\n") + + if not found_keys: + print(f"Regular expression /{expr}/ not found.") diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py new file mode 100644 index 000000000000..8a57846cbc69 --- /dev/null +++ b/tools/lib/python/abi/abi_regex.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# xxpylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Convert ABI what into regular expressions +""" + +import re +import sys + +from pprint import pformat + +from abi_parser import AbiParser +from helpers import AbiDebug + +class AbiRegex(AbiParser): + """Extends AbiParser to search ABI nodes with regular expressions""" + + # Escape only ASCII visible characters + escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" + leave_others = "others" + + # Tuples with regular expressions to be compiled and replacement data + re_whats = [ + # Drop escape characters that might exist + (re.compile("\\\\"), ""), + + # Temporarily escape dot characters + (re.compile(r"\."), "\xf6"), + + # Temporarily change [0-9]+ type of patterns + (re.compile(r"\[0\-9\]\+"), "\xff"), + + # Temporarily change [\d+-\d+] type of patterns + (re.compile(r"\[0\-\d+\]"), "\xff"), + (re.compile(r"\[0:\d+\]"), "\xff"), + (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), + + # Temporarily change [0-9] type of patterns + (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), + + # Handle multiple option patterns + (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), + + # Handle wildcards + (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), + (re.compile(r"/\*/"), "/.*/"), + (re.compile(r"/\xf6\xf6\xf6"), "/.*"), + (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), + (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), + (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), + + (re.compile(r"XX+"), "\\\\w\xf7"), + (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), + (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), + (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), + + # Recover [0-9] type of patterns + (re.compile(r"\xf4"), "["), + (re.compile(r"\xf5"), "]"), + + # Remove duplicated spaces + (re.compile(r"\s+"), r" "), + + # Special case: drop comparison as in: + # What: foo = + # (this happens on a few IIO definitions) + (re.compile(r"\s*\=.*$"), ""), + + # Escape all other symbols + (re.compile(escape_symbols), r"\\\1"), + (re.compile(r"\\\\"), r"\\"), + (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), + (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), + + (re.compile(r"\xff"), r"\\d+"), + + # Special case: IIO ABI which a parenthesis. + (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), + + # Simplify regexes with multiple .* + (re.compile(r"(?:\.\*){2,}"), ""), + + # Recover dot characters + (re.compile(r"\xf6"), "\\."), + # Recover plus characters + (re.compile(r"\xf7"), "+"), + ] + re_has_num = re.compile(r"\\d") + + # Symbol name after escape_chars that are considered a devnode basename + re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") + + # List of popular group names to be skipped to minimize regex group size + # Use AbiDebug.SUBGROUP_SIZE to detect those + skip_names = set(["devices", "hwmon"]) + + def regex_append(self, what, new): + """ + Get a search group for a subset of regular expressions. + + As ABI may have thousands of symbols, using a for to search all + regular expressions is at least O(n^2). When there are wildcards, + the complexity increases substantially, eventually becoming exponential. + + To avoid spending too much time on them, use a logic to split + them into groups. The smaller the group, the better, as it would + mean that searches will be confined to a small number of regular + expressions. + + The conversion to a regex subset is tricky, as we need something + that can be easily obtained from the sysfs symbol and from the + regular expression. So, we need to discard nodes that have + wildcards. + + If it can't obtain a subgroup, place the regular expression inside + a special group (self.leave_others). + """ + + search_group = None + + for search_group in reversed(new.split("/")): + if not search_group or search_group in self.skip_names: + continue + if self.re_symbol_name.match(search_group): + break + + if not search_group: + search_group = self.leave_others + + if self.debug & AbiDebug.SUBGROUP_MAP: + self.log.debug("%s: mapped as %s", what, search_group) + + try: + if search_group not in self.regex_group: + self.regex_group[search_group] = [] + + self.regex_group[search_group].append(re.compile(new)) + if self.search_string: + if what.find(self.search_string) >= 0: + print(f"What: {what}") + except re.PatternError: + self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" + " '%s'", what, new) + + def get_regexes(self, what): + """ + Given an ABI devnode, return a list of all regular expressions that + may match it, based on the sub-groups created by regex_append() + """ + + re_list = [] + + patches = what.split("/") + patches.reverse() + patches.append(self.leave_others) + + for search_group in patches: + if search_group in self.regex_group: + re_list += self.regex_group[search_group] + + return re_list + + def __init__(self, *args, **kwargs): + """ + Override init method to get verbose argument + """ + + self.regex_group = None + self.search_string = None + self.re_string = None + + if "search_string" in kwargs: + self.search_string = kwargs.get("search_string") + del kwargs["search_string"] + + if self.search_string: + + try: + self.re_string = re.compile(self.search_string) + except re.PatternError as e: + msg = f"{self.search_string} is not a valid regular expression" + raise ValueError(msg) from e + + super().__init__(*args, **kwargs) + + def parse_abi(self, *args, **kwargs): + + super().parse_abi(*args, **kwargs) + + self.regex_group = {} + + print("Converting ABI What fields into regexes...", file=sys.stderr) + + for t in sorted(self.data.items(), key=lambda x: x[0]): + v = t[1] + if v.get("type") == "File": + continue + + v["regex"] = [] + + for what in v.get("what", []): + if not what.startswith("/sys"): + continue + + new = what + for r, s in self.re_whats: + try: + new = r.sub(s, new) + except re.PatternError as e: + # Help debugging troubles with new regexes + raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e + + v["regex"].append(new) + + if self.debug & AbiDebug.REGEX: + self.log.debug("%-90s <== %s", new, what) + + # Store regex into a subgroup to speedup searches + self.regex_append(what, new) + + if self.debug & AbiDebug.SUBGROUP_DICT: + self.log.debug("%s", pformat(self.regex_group)) + + if self.debug & AbiDebug.SUBGROUP_SIZE: + biggestd_keys = sorted(self.regex_group.keys(), + key= lambda k: len(self.regex_group[k]), + reverse=True) + + print("Top regex subgroups:", file=sys.stderr) + for k in biggestd_keys[:10]: + print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py new file mode 100644 index 000000000000..639b23e4ca33 --- /dev/null +++ b/tools/lib/python/abi/helpers.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# pylint: disable=R0903 +# SPDX-License-Identifier: GPL-2.0 + +""" +Helper classes for ABI parser +""" + +ABI_DIR = "Documentation/ABI/" + + +class AbiDebug: + """Debug levels""" + + WHAT_PARSING = 1 + WHAT_OPEN = 2 + DUMP_ABI_STRUCTS = 4 + UNDEFINED = 8 + REGEX = 16 + SUBGROUP_MAP = 32 + SUBGROUP_DICT = 64 + SUBGROUP_SIZE = 128 + GRAPH = 256 + + +DEBUG_HELP = """ +1 - enable debug parsing logic +2 - enable debug messages on file open +4 - enable debug for ABI parse data +8 - enable extra debug information to identify troubles + with ABI symbols found at the local machine that + weren't found on ABI documentation (used only for + undefined subcommand) +16 - enable debug for what to regex conversion +32 - enable debug for symbol regex subgroups +64 - enable debug for sysfs graph tree variable +""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py new file mode 100644 index 000000000000..f15c94a6e33c --- /dev/null +++ b/tools/lib/python/abi/system_symbols.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0912,R0914,R0915,R1702 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import os +import re +import sys + +from concurrent import futures +from datetime import datetime +from random import shuffle + +from helpers import AbiDebug + +class SystemSymbols: + """Stores arguments for the class and initialize class vars""" + + def graph_add_file(self, path, link=None): + """ + add a file path to the sysfs graph stored at self.root + """ + + if path in self.files: + return + + name = "" + ref = self.root + for edge in path.split("/"): + name += edge + "/" + if edge not in ref: + ref[edge] = {"__name": [name.rstrip("/")]} + + ref = ref[edge] + + if link and link not in ref["__name"]: + ref["__name"].append(link.rstrip("/")) + + self.files.add(path) + + def print_graph(self, root_prefix="", root=None, level=0): + """Prints a reference tree graph using UTF-8 characters""" + + if not root: + root = self.root + level = 0 + + # Prevent endless traverse + if level > 5: + return + + if level > 0: + prefix = "├──" + last_prefix = "└──" + else: + prefix = "" + last_prefix = "" + + items = list(root.items()) + + names = root.get("__name", []) + for k, edge in items: + if k == "__name": + continue + + if not k: + k = "/" + + if len(names) > 1: + k += " links: " + ",".join(names[1:]) + + if edge == items[-1][1]: + print(root_prefix + last_prefix + k) + p = root_prefix + if level > 0: + p += " " + self.print_graph(p, edge, level + 1) + else: + print(root_prefix + prefix + k) + p = root_prefix + "│ " + self.print_graph(p, edge, level + 1) + + def _walk(self, root): + """ + Walk through sysfs to get all devnodes that aren't ignored. + + By default, uses /sys as sysfs mounting point. If another + directory is used, it replaces them to /sys at the patches. + """ + + with os.scandir(root) as obj: + for entry in obj: + path = os.path.join(root, entry.name) + if self.sysfs: + p = path.replace(self.sysfs, "/sys", count=1) + else: + p = path + + if self.re_ignore.search(p): + return + + # Handle link first to avoid directory recursion + if entry.is_symlink(): + real = os.path.realpath(path) + if not self.sysfs: + self.aliases[path] = real + else: + real = real.replace(self.sysfs, "/sys", count=1) + + # Add absfile location to graph if it doesn't exist + if not self.re_ignore.search(real): + # Add link to the graph + self.graph_add_file(real, p) + + elif entry.is_file(): + self.graph_add_file(p) + + elif entry.is_dir(): + self._walk(path) + + def __init__(self, abi, sysfs="/sys", hints=False): + """ + Initialize internal variables and get a list of all files inside + sysfs that can currently be parsed. + + Please notice that there are several entries on sysfs that aren't + documented as ABI. Ignore those. + + The real paths will be stored under self.files. Aliases will be + stored in separate, as self.aliases. + """ + + self.abi = abi + self.log = abi.log + + if sysfs != "/sys": + self.sysfs = sysfs.rstrip("/") + else: + self.sysfs = None + + self.hints = hints + + self.root = {} + self.aliases = {} + self.files = set() + + dont_walk = [ + # Those require root access and aren't documented at ABI + f"^{sysfs}/kernel/debug", + f"^{sysfs}/kernel/tracing", + f"^{sysfs}/fs/pstore", + f"^{sysfs}/fs/bpf", + f"^{sysfs}/fs/fuse", + + # This is not documented at ABI + f"^{sysfs}/module", + + f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI + f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings + "sections|notes", # aren't actually part of ABI + + # kernel-parameters.txt - not easy to parse + "parameters", + ] + + self.re_ignore = re.compile("|".join(dont_walk)) + + print(f"Reading {sysfs} directory contents...", file=sys.stderr) + self._walk(sysfs) + + def check_file(self, refs, found): + """Check missing ABI symbols for a given sysfs file""" + + res_list = [] + + try: + for names in refs: + fname = names[0] + + res = { + "found": False, + "fname": fname, + "msg": "", + } + res_list.append(res) + + re_what = self.abi.get_regexes(fname) + if not re_what: + self.abi.log.warning(f"missing rules for {fname}") + continue + + for name in names: + for r in re_what: + if self.abi.debug & AbiDebug.UNDEFINED: + self.log.debug("check if %s matches '%s'", name, r.pattern) + if r.match(name): + res["found"] = True + if found: + res["msg"] += f" {fname}: regex:\n\t" + continue + + if self.hints and not res["found"]: + res["msg"] += f" {fname} not found. Tested regexes:\n" + for r in re_what: + res["msg"] += " " + r.pattern + "\n" + + except KeyboardInterrupt: + pass + + return res_list + + def _ref_interactor(self, root): + """Recursive function to interact over the sysfs tree""" + + for k, v in root.items(): + if isinstance(v, dict): + yield from self._ref_interactor(v) + + if root == self.root or k == "__name": + continue + + if self.abi.re_string: + fname = v["__name"][0] + if self.abi.re_string.search(fname): + yield v + else: + yield v + + + def get_fileref(self, all_refs, chunk_size): + """Interactor to group refs into chunks""" + + n = 0 + refs = [] + + for ref in all_refs: + refs.append(ref) + + n += 1 + if n >= chunk_size: + yield refs + n = 0 + refs = [] + + yield refs + + def check_undefined_symbols(self, max_workers=None, chunk_size=50, + found=None, dry_run=None): + """Seach ABI for sysfs symbols missing documentation""" + + self.abi.parse_abi() + + if self.abi.debug & AbiDebug.GRAPH: + self.print_graph() + + all_refs = [] + for ref in self._ref_interactor(self.root): + all_refs.append(ref["__name"]) + + if dry_run: + print("Would check", file=sys.stderr) + for ref in all_refs: + print(", ".join(ref)) + + return + + print("Starting to search symbols (it may take several minutes):", + file=sys.stderr) + start = datetime.now() + old_elapsed = None + + # Python doesn't support multithreading due to limitations on its + # global lock (GIL). While Python 3.13 finally made GIL optional, + # there are still issues related to it. Also, we want to have + # backward compatibility with older versions of Python. + # + # So, use instead multiprocess. However, Python is very slow passing + # data from/to multiple processes. Also, it may consume lots of memory + # if the data to be shared is not small. So, we need to group workload + # in chunks that are big enough to generate performance gains while + # not being so big that would cause out-of-memory. + + num_refs = len(all_refs) + print(f"Number of references to parse: {num_refs}", file=sys.stderr) + + if not max_workers: + max_workers = os.cpu_count() + elif max_workers > os.cpu_count(): + max_workers = os.cpu_count() + + max_workers = max(max_workers, 1) + + max_chunk_size = int((num_refs + max_workers - 1) / max_workers) + chunk_size = min(chunk_size, max_chunk_size) + chunk_size = max(1, chunk_size) + + if max_workers > 1: + executor = futures.ProcessPoolExecutor + + # Place references in a random order. This may help improving + # performance, by mixing complex/simple expressions when creating + # chunks + shuffle(all_refs) + else: + # Python has a high overhead with processes. When there's just + # one worker, it is faster to not create a new process. + # Yet, User still deserves to have a progress print. So, use + # python's "thread", which is actually a single process, using + # an internal schedule to switch between tasks. No performance + # gains for non-IO tasks, but still it can be quickly interrupted + # from time to time to display progress. + executor = futures.ThreadPoolExecutor + + not_found = [] + f_list = [] + with executor(max_workers=max_workers) as exe: + for refs in self.get_fileref(all_refs, chunk_size): + if refs: + try: + f_list.append(exe.submit(self.check_file, refs, found)) + + except KeyboardInterrupt: + return + + total = len(f_list) + + if not total: + if self.abi.re_string: + print(f"No ABI symbol matches {self.abi.search_string}") + else: + self.abi.log.warning("No ABI symbols found") + return + + print(f"{len(f_list):6d} jobs queued on {max_workers} workers", + file=sys.stderr) + + while f_list: + try: + t = futures.wait(f_list, timeout=1, + return_when=futures.FIRST_COMPLETED) + + done = t[0] + + for fut in done: + res_list = fut.result() + + for res in res_list: + if not res["found"]: + not_found.append(res["fname"]) + if res["msg"]: + print(res["msg"]) + + f_list.remove(fut) + except KeyboardInterrupt: + return + + except RuntimeError as e: + self.abi.log.warning(f"Future: {e}") + break + + if sys.stderr.isatty(): + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + if len(f_list) < total: + elapsed += f" ({total - len(f_list)}/{total} jobs completed). " + if elapsed != old_elapsed: + print(elapsed + "\r", end="", flush=True, + file=sys.stderr) + old_elapsed = elapsed + + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + print(elapsed, file=sys.stderr) + + for f in sorted(not_found): + print(f"{f} not found.") diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/tools/lib/python/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/lib/python/kdoc/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab . + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py new file mode 100644 index 000000000000..1fd8d17edb32 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Parse lernel-doc tags on multiple kernel source files. +""" + +import argparse +import logging +import os +import re + +from kdoc_parser import KernelDoc +from kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(follow_symlinks=False): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an interator to parse all source files from file_list, + handling directories if any + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the syler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in glob.parse_files(export_file, self.file_not_found_cb): + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + if fname not in self.results: + self.config.log.warning("No kernel-doc for file %s", fname) + continue + + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: + m = self.out_msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + if msg: + yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py new file mode 100644 index 000000000000..19805301cb2c --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# A class that will, eventually, encapsulate all of the parsed data that we +# then pass into the output modules. +# + +class KdocItem: + def __init__(self, name, fname, type, start_line, **other_stuff): + self.name = name + self.fname = fname + self.type = type + self.declaration_start_line = start_line + self.sections = {} + self.sections_start_lines = {} + self.parameterlist = [] + self.parameterdesc_start_lines = [] + self.parameterdescs = {} + self.parametertypes = {} + # + # Just save everything else into our own dict so that the output + # side can grab it directly as before. As we move things into more + # structured data, this will, hopefully, fade away. + # + self.other_stuff = other_stuff + + def get(self, key, default = None): + return self.other_stuff.get(key, default) + + def __getitem__(self, key): + return self.get(key) + + # + # Tracking of section and parameter information. + # + def set_sections(self, sections, start_lines): + self.sections = sections + self.section_start_lines = start_lines + + def set_params(self, names, descs, types, starts): + self.parameterlist = names + self.parameterdescs = descs + self.parametertypes = types + self.parameterdesc_start_lines = starts diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py new file mode 100644 index 000000000000..58f115059e93 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -0,0 +1,824 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class (OutputFormat) which +contains a dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output. Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc_parser import KernelDoc, type_param +from kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 # output all symbols and doc sections + OUTPUT_INCLUDE = 1 # output only specified symbols + OUTPUT_EXPORTED = 2 # output exported symbols + OUTPUT_INTERNAL = 3 # output non-exported symbols + + # Virtual member to be overriden at the inherited classes + highlights = [] + + def __init__(self): + """Declare internal vars and set mode to OUTPUT_ALL""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according with the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + for log_msg in args.warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + + self.data = "" + + dtype = args.type + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't now how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + + def out_doc(self, fname, name, args): + """Outputs a DOC block""" + + def out_function(self, fname, name, args): + """Outputs a function""" + + def out_enum(self, fname, name, args): + """Outputs an enum""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef""" + + def out_struct(self, fname, name, args): + """Outputs a struct""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output""" + + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + blankline = "\n" + + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + for section, text in args.sections.items(): + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(args.section_start_lines.get(section, 0)) + self.output_highlight(text) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = name + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += name + " (" + + ln = args.declaration_start_line + count = 0 + for parameter in args.parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args.parametertypes.get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {name}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML
) to help set + # function prototypes apart + self.lineprefix = " " + + if args.parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args.parametertypes.get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in args.parameterdescs and \ + args.parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(args.parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in args.parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(args.parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.type + ln = args.declaration_start_line + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in args.parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(args.parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + self.symbols = [] + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_function(self, fname, name, args): + """output function in man""" + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {name}' + "\n" + else: + self.data += f'.B "{name}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in args.parameterlist: + if count == len(args.parameterlist) - 1: + post = ");" + + dtype = args.parametertypes.get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if args.parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in args.parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(text) + + def out_enum(self, fname, name, args): + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {name}" + " {\n" + + count = 0 + for parameter in args.parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(args.parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_typedef(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {name} \\- {purpose}\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_struct(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + definition = args.get('definition') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args.type} {name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{args.type} {name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in args.parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name)) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py new file mode 100644 index 000000000000..f7dbb0868367 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -0,0 +1,1667 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import sys +import re +from pprint import pformat + +from kdoc_re import NestedMatch, KernRe +from kdoc_item import KdocItem + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) +doc_sect = doc_com + \ + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text + +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + return multi_space.sub(' ', s.strip()) + +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + +class state: + """ + State machine enums + """ + + # Parser states + NORMAL = 0 # normal code + NAME = 1 # looking for function name + DECLARATION = 2 # We have seen a declaration which might not be done + BODY = 3 # the body of the comment + SPECIAL_SECTION = 4 # doc section ending with a blank line + PROTO = 5 # scanning prototype + DOCBLOCK = 6 # documentation block + INLINE_NAME = 7 # gathering doc outside main block + INLINE_TEXT = 8 # reading the body of inline docs + + name = [ + "NORMAL", + "NAME", + "DECLARATION", + "BODY", + "SPECIAL_SECTION", + "PROTO", + "DOCBLOCK", + "INLINE_NAME", + "INLINE_TEXT", + ] + + +SECTION_DEFAULT = "Description" # default section + +class KernelEntry: + + def __init__(self, config, fname, ln): + self.config = config + self.fname = fname + + self._contents = [] + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + self.fname = fname + + # State flags + self.brcount = 0 + self.declaration_start_line = ln + 1 + + # + # Management of section contents + # + def add_text(self, text): + self._contents.append(text) + + def contents(self): + return '\n'.join(self._contents) + '\n' + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + # + # Begin a new section. + # + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return + name = self.section + contents = self.contents() + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'") + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents + else: + self.sections[name] = contents + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self._contents = [] + +python_warning = False + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Section names + + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = state.NORMAL + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + python_warning = True + + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + if self.entry: + self.entry.emit_msg(ln, msg, warning=warning) + return + + log_msg = f"{self.fname}:{ln} {msg}" + + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + if self.entry: + self.entry.dump_section(start_new) + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections + for section in ["Description", "Return"]: + if section in sections and not sections[section].rstrip(): + del sections[section] + item.set_sections(sections, self.entry.section_start_lines) + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) + self.entries.append(item) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + + self.entry = KernelEntry(self.config, self.fname, ln) + + # State flags + self.state = state.NORMAL + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + # + # The pointer-to-function case. + # + elif KernRe(r'\(.+\)\s*\(').search(arg): + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # + elif KernRe(r'\(.+\)\s*\[').search(arg): + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + args = KernRe(r'\s*,\s*').split(arg) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) + else: + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + + bitfield_re = KernRe(r'(.*?):(\w+)') + for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): + if dtype != "": # Skip unnamed bit-fields + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", + arg, declaration_name) + else: + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + self.emit_msg(ln, + f"Excess {dname} '{section}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") + + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): + type_pattern = r'(struct|union)' + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + return (r.group(1), r.group(2), r.group(3)) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + if r.search(proto): + return (r.group(1), r.group(3), r.group(2)) + return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # + def rewrite_struct_members(self, members): + # + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). + # + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: + for t in tuples: + newmember = "" + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): + s_id = s_id.strip() + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + # + # Pass through the members of this inner structure/union. + # + for arg in content.split(';'): + arg = arg.strip() + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') + if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) + # Pointer-to-function + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + # + # Otherwise a non-function member. + # + else: + # + # Remove bitmap and array portions and spaces around commas + # + arg = KernRe(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) + # + # Look for a normal decl - "type name[,name...]" + # + r = KernRe(r'(.*)\s+([\S+,]+)') + if r.search(arg): + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " + else: + newmember += f"{arg}; " + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # + members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members + + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) + self.output_declaration(decl_type, declaration_name, + definition=self.format_struct_decl(declaration), + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + # + # Parse out the name and members of the enum. Typedef form first. + # + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = trim_private_members(r.group(1)) + # + # Failing that, look for a straight enum + # + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = trim_private_members(r.group(2)) + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # + member_set = set() + members = KernRe(r'\([^;)]*\)').sub('', members) + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + # + # Ensure that every described member actually exists in the enum. + # + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_msg(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + elif self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + elif self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + found = func_macro = False + return_type = '' + decl_type = 'function' + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + prototype = new_proto + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'\w+' + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # + if not found: + patterns = [ + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', + ] + + for p in patterns: + r = KernRe(p) + if r.match(prototype): + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + found = True + break + # + # Parsing done; make sure that things are as we expect. + # + if not found: + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") + return + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") + return + self.check_sections(ln, declaration_name, "function") + self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.create_parameter_list(ln, 'function', args, ',', declaration_name) + + self.output_declaration('function', declaration_name, + typedef=True, + functiontype=return_type, + purpose=self.entry.declaration_purpose) + return + # + # Not a function, try to parse a simple typedef. + # + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + purpose=self.entry.declaration_purpose) + return + + self.emit_msg(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process EXPORT_SYMBOL* tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + elif export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] + function_set.add(symbol) + return True + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + + # next line is always the function name + self.state = state.NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + # + # Check for a DOC: block and handle them specially. + # + if doc_block.search(line): + + if not doc_block.group(1): + self.entry.begin_section(ln, "Introduction") + else: + self.entry.begin_section(ln, doc_block.group(1)) + + self.entry.identifier = self.entry.section + self.state = state.DOCBLOCK + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + + # Test for data declaration + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) + # + # Look for a function description + # + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) + self.entry.decl_type = "function" + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return + # + # OK, set up for a new kerneldoc entry. + # + self.state = state.BODY + self.entry.identifier = self.entry.identifier.strip(" ") + # if there's no @param blocks need to set up default section here + self.entry.begin_section(ln + 1) + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # + r = KernRe("[-:](.*)") + if r.search(line): + self.entry.declaration_purpose = trim_whitespace(r.group(1)) + self.state = state.DECLARATION + else: + self.entry.declaration_purpose = "" + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = state.NORMAL + + if self.config.verbose: + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + # + # Failed to find an identifier. Emit a warning + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + + # + # Helper function to determine if a new section is being started. + # + def is_new_section(self, ln, line): + if doc_sect.search(line): + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # + newsection = doc_sect.group(1) + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + self.dump_section() + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.add_text(newcontents.lstrip()) + return True + return False + + # + # Helper function to detect (and effect) the end of a kerneldoc comment. + # + def is_comment_end(self, ln, line): + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # Look for anything with the " * " line beginning. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # + if cont == "": + self.state = state.BODY + # + # Otherwise we have more of the declaration section to soak up. + # + else: + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line + """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # + if KernRe(r"\s*\*\s*$").match(line): + self.entry.begin_section(ln, dump = True) + self.state = state.BODY + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.add_text(cont[self.entry.leading_space:]) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): + """ + STATE_BODY: the bulk of a kerneldoc comment. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + + if doc_content.search(line): + cont = doc_content.group(1) + self.entry.add_text(cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" + + if doc_inline_sect.search(line): + self.entry.begin_section(ln, doc_inline_sect.group(1)) + self.entry.add_text(doc_inline_sect.group(2).lstrip()) + self.state = state.INLINE_TEXT + elif doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? + + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" + + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + line = KernRe(r"//.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif not line.startswith('#'): # skip other preprocessor stuff + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + # + # If we now have the whole prototype, clean it up and declare victory. + # + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments and surrounding spaces + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + # by turning it into + # int pcs_config(struct foo) + # + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + # + # Handle special declaration syntaxes + # + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.begin_section(ln, doc_inline_oneline.group(1)) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() + + elif doc_inline_start.search(line): + self.state = state.INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + # + # The state/action table telling us which function to invoke in + # each state. + # + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.DECLARATION: process_decl, + state.SPECIAL_SECTION: process_special, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == state.PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + if not prev_ln: + prev_ln = ln + continue + + if prev: + ln = prev_ln + line = prev + line + prev = "" + prev_ln = None + + self.config.log.debug("%d %s: %s", + ln, state.name[self.state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) + + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..612223e1e723 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py new file mode 100755 index 000000000000..29317f8006ea --- /dev/null +++ b/tools/lib/python/kdoc/latex_fonts.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts. + +For "make pdfdocs", reports of build errors of translations.pdf started +arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3] format of "Noto CJK" +fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +References: +[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +[3]: https://en.wikipedia.org/wiki/Variable_font +[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +[5]: https://build.opensuse.org/request/show/1157217 + +#=========================================================================== +Workarounds for building translations.pdf +#=========================================================================== + +* Denylist "variable font" Noto CJK fonts. + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template: +----------------------------------------------------------------- + + + + + + + + /usr/share/fonts/google-noto-*-cjk-vf-fonts + + /usr/share/fonts/truetype/Noto*CJK*-VF.otf + + + +----------------------------------------------------------------- + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + - Need non-variable "Noto CJK" fonts. + * Fedora + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + * openSUSE tumbleweed + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + and at: + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + , then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +!!! Caution !!! + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self, deny_vf=None): + if not deny_vf: + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def description(self): + return __doc__ + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, run: + + tools/docs/check-variable-fonts.py -h + """) + msg += "=" * 77 + + return msg diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py new file mode 100755 index 000000000000..25361996cd20 --- /dev/null +++ b/tools/lib/python/kdoc/parse_data_structs.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore ` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace + + Replaces how old_symbol with a new reference. The new_reference can be: + + - A simple symbol name; + - A full Sphinx reference. + + 3. Namespace rules + + namespace + + Sets C namespace to be used during cross-reference generation. Can + be overridden by replace rules. + + On ignore and replace rules, can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + + namespace MC + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + self.namespace = None + self.ignore = [] + self.replace = [] + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def read_exceptions(self, fname: str): + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + + if match: + self.ignore.append((ln, match.group(1), match.group(2))) + continue + + # replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if match: + self.replace.append((ln, match.group(1), match.group(2), + match.group(3))) + continue + + match = re.match(r"^namespace\s+(\S+)", line) + if match: + self.namespace = match.group(1) + continue + + sys.exit(f"{name}:{ln}: invalid line: {line}") + + def apply_exceptions(self): + """ + Process exceptions file with rules to ignore or replace references. + """ + + # Handle ignore rules + for ln, c_type, symbol in self.ignore: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + # Handle replace rules + for ln, c_type, old, new in self.replace: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + (_, ln) = self.symbols[c_type][old] + self.symbols[c_type][old] = (new_ref, ln) + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def store_type(self, ln, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type or self.namespace: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + # C domain references may have namespaces + if ref_type.startswith(":c:"): + if self.namespace: + ref_name = f"{self.namespace}.{ref_name}" + + if ref_type: + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = f"`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str, exceptions: str = None): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + self.read_exceptions(exceptions) + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type(line_no, "symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type(line_no, "ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type(line_no, "define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type(line_no, "typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type(line_no, "enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type(line_no, "struct", match.group(1)) + break + + self.apply_exceptions() + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, (ref, ln) in sorted(refs.items()): + print(f" #{ln:<5d} {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, (replacement, _) in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a list of symbols to be part of a TOC contents table + """ + text = [] + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, (ref, ln) in sorted(refs.items()): + text.append(f"- LINENO_{ln}: {ref}") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py new file mode 100644 index 000000000000..4fde1b882164 --- /dev/null +++ b/tools/lib/python/kdoc/python_version.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab + +""" +Handle Python version check logic. + +Not all Python versions are supported by scripts. Yet, on some cases, +like during documentation build, a newer version of python could be +available. + +This class allows checking if the minimal requirements are followed. + +Better than that, PythonVersion.check_python() not only checks the minimal +requirements, but it automatically switches to a the newest available +Python version if present. + +""" + +import os +import re +import subprocess +import shlex +import sys + +from glob import glob +from textwrap import indent + +class PythonVersion: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + def __init__(self, version): + """Ïnitialize self.version tuple from a version string""" + self.version = self.parse_version(version) + + @staticmethod + def parse_version(version): + """Convert a major.minor.patch version into a tuple""" + return tuple(int(x) for x in version.split(".")) + + @staticmethod + def ver_str(version): + """Returns a version tuple as major.minor.patch""" + return ".".join([str(x) for x in version]) + + @staticmethod + def cmd_print(cmd, max_len=80): + cmd_line = [] + + for w in cmd: + w = shlex.quote(w) + + if cmd_line: + if not max_len or len(cmd_line[-1]) + len(w) < max_len: + cmd_line[-1] += " " + w + continue + else: + cmd_line[-1] += " \\" + cmd_line.append(w) + else: + cmd_line.append(w) + + return "\n ".join(cmd_line) + + def __str__(self): + """Returns a version tuple as major.minor.patch from self.version""" + return self.ver_str(self.version) + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + kwargs = {} + if sys.version_info < (3, 7): + kwargs['universal_newlines'] = True + else: + kwargs['text'] = True + + result = subprocess.run([cmd, "--version"], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + **kwargs, check=False) + + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return PythonVersion.parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(min_version): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9][0-9]", + "python3.[0-9]", + ] + + python_cmd = [] + + # Seek for a python binary newer than min_version + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = PythonVersion.get_python_version(cmd) + if version >= min_version: + python_cmd.append((version, cmd)) + + return sorted(python_cmd, reverse=True) + + @staticmethod + def check_python(min_version, show_alternatives=False, bail_out=False, + success_on_error=False): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= min_version: + ver = PythonVersion.ver_str(cur_ver) + return + + python_ver = PythonVersion.ver_str(cur_ver) + + available_versions = PythonVersion.find_python(min_version) + if not available_versions: + print(f"ERROR: Python version {python_ver} is not spported anymore\n") + print(" Can't find a new version. This script may fail") + return + + script_path = os.path.abspath(sys.argv[0]) + + # Check possible alternatives + if available_versions: + new_python_cmd = available_versions[0][1] + else: + new_python_cmd = None + + if show_alternatives and available_versions: + print("You could run, instead:") + for _, cmd in available_versions: + args = [cmd, script_path] + sys.argv[1:] + + cmd_str = indent(PythonVersion.cmd_print(args), " ") + print(f"{cmd_str}\n") + + if bail_out: + msg = f"Python {python_ver} not supported. Bailing out" + if success_on_error: + print(msg, file=sys.stderr) + sys.exit(0) + else: + sys.exit(msg) + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + # Restart script using the newer version + args = [new_python_cmd, script_path] + sys.argv[1:] + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") -- cgit v1.2.3 From 992a9df41ad7173588bf90e15b33d45db2811aea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:30 -0700 Subject: docs: bring some order to our Python module hierarchy Now that we have tools/lib/python for our Python modules, turn them into proper packages with a single namespace so that everything can just use tools/lib/python in sys.path. No functional change. Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-3-corbet@lwn.net> --- Documentation/sphinx/kernel_abi.py | 4 ++-- Documentation/sphinx/kernel_include.py | 4 ++-- Documentation/sphinx/kerneldoc.py | 6 +++--- scripts/kernel-doc.py | 6 +++--- tools/docs/check-variable-fonts.py | 4 ++-- tools/docs/get_abi.py | 10 +++++----- tools/docs/parse-headers.py | 6 +++--- tools/docs/sphinx-build-wrapper | 5 ++--- tools/docs/sphinx-pre-install | 4 ++-- tools/lib/python/__init__.py | 0 tools/lib/python/abi/__init__.py | 0 tools/lib/python/abi/abi_parser.py | 2 +- tools/lib/python/abi/abi_regex.py | 4 ++-- tools/lib/python/abi/system_symbols.py | 2 +- tools/lib/python/kdoc/__init__.py | 0 tools/lib/python/kdoc/kdoc_files.py | 4 ++-- tools/lib/python/kdoc/kdoc_output.py | 4 ++-- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 18 files changed, 34 insertions(+), 35 deletions(-) create mode 100644 tools/lib/python/__init__.py create mode 100644 tools/lib/python/abi/__init__.py create mode 100644 tools/lib/python/kdoc/__init__.py (limited to 'scripts/kernel-doc.py') diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 7ec832da8444..5667f207d175 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,9 +43,9 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from abi_parser import AbiParser +from abi.abi_parser import AbiParser __version__ = "1.0" diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index a12455daa6d7..626762ff6af3 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,9 +97,9 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from parse_data_structs import ParseDataStructs +from kdoc.parse_data_structs import ParseDataStructs __version__ = "1.0" logger = logging.getLogger(__name__) diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 56f382a6bdf1..d8cdf068ef35 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,10 +42,10 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from kdoc_files import KernelFiles -from kdoc_output import RestFormat +from kdoc.kdoc_files import KernelFiles +from kdoc.kdoc_output import RestFormat __version__ = '1.0' kfiles = None diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index bb24bbf73167..7a1eaf986bcd 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "../tools/lib/python/kdoc" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) @@ -292,8 +292,8 @@ def main(): logger.warning("Python 3.7 or later is required for correct results") # Import kernel-doc libraries only after checking Python version - from kdoc_files import KernelFiles # pylint: disable=C0415 - from kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 if args.man: out_style = ManFormat(modulename=args.modulename) diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c48bb05dad82..958d5a745724 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -17,9 +17,9 @@ import sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) -from latex_fonts import LatexFontChecker +from kdoc.latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index e0abfe12fac7..2f0b99401f26 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,15 +14,15 @@ import sys # Import Python modules -LIB_DIR = "../lib/python/abi" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -from abi_parser import AbiParser # pylint: disable=C0413 -from abi_regex import AbiRegex # pylint: disable=C0413 -from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 -from system_symbols import SystemSymbols # pylint: disable=C0413 +from abi.abi_parser import AbiParser # pylint: disable=C0413 +from abi.abi_regex import AbiRegex # pylint: disable=C0413 +from abi.helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 +from abi.system_symbols import SystemSymbols # pylint: disable=C0413 # Command line classes diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index ed9cf2bf22de..436acea4c6ca 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -28,9 +28,9 @@ import argparse, sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from parse_data_structs import ParseDataStructs -from enrich_formatter import EnrichFormatter +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.parse_data_structs import ParseDataStructs +from kdoc.enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index ce0b1b5292da..d4943d952e2a 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -61,10 +61,9 @@ LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary -from python_version import PythonVersion -from latex_fonts import LatexFontChecker +from kdoc.python_version import PythonVersion +from kdoc.latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index d8c9fb76948d..965c9b093a41 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -35,8 +35,8 @@ from glob import glob import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from python_version import PythonVersion +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py index 66a738013ce1..9b8db70067ef 100644 --- a/tools/lib/python/abi/abi_parser.py +++ b/tools/lib/python/abi/abi_parser.py @@ -17,7 +17,7 @@ from random import randrange, seed # Import Python modules -from helpers import AbiDebug, ABI_DIR +from abi.helpers import AbiDebug, ABI_DIR class AbiParser: diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py index 8a57846cbc69..d5553206de3c 100644 --- a/tools/lib/python/abi/abi_regex.py +++ b/tools/lib/python/abi/abi_regex.py @@ -12,8 +12,8 @@ import sys from pprint import pformat -from abi_parser import AbiParser -from helpers import AbiDebug +from abi.abi_parser import AbiParser +from abi.helpers import AbiDebug class AbiRegex(AbiParser): """Extends AbiParser to search ABI nodes with regular expressions""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py index f15c94a6e33c..4a2554da217b 100644 --- a/tools/lib/python/abi/system_symbols.py +++ b/tools/lib/python/abi/system_symbols.py @@ -15,7 +15,7 @@ from concurrent import futures from datetime import datetime from random import shuffle -from helpers import AbiDebug +from abi.helpers import AbiDebug class SystemSymbols: """Stores arguments for the class and initialize class vars""" diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 1fd8d17edb32..562cdf5261c3 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -13,8 +13,8 @@ import logging import os import re -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat +from kdoc.kdoc_parser import KernelDoc +from kdoc.kdoc_output import OutputFormat class GlobSourceFiles: diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 58f115059e93..14378953301b 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -19,8 +19,8 @@ import os import re from datetime import datetime -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe +from kdoc.kdoc_parser import KernelDoc, type_param +from kdoc.kdoc_re import KernRe function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f7dbb0868367..c0cc714d4d6f 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -16,8 +16,8 @@ import sys import re from pprint import pformat -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem +from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_item import KdocItem # # Regular expressions used to parse kernel-doc markups at KernelDoc class. -- cgit v1.2.3 From 802774d8539fa73487190ec45438777a3c38d424 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 19 Jan 2026 13:04:57 +0100 Subject: docs: kdoc: avoid error_count overflows The glibc library limits the return code to 8 bits. We need to stick to this limit when using sys.exit(error_count). Signed-off-by: Mauro Carvalho Chehab Cc: stable@vger.kernel.org Signed-off-by: Jonathan Corbet Message-ID: <233d1674db99ed8feb405a2f781de350f0fba0ac.1768823489.git.mchehab+huawei@kernel.org> --- scripts/kernel-doc.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'scripts/kernel-doc.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 7a1eaf986bcd..1ebb16b9bb08 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -116,6 +116,8 @@ SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) +WERROR_RETURN_CODE = 3 + DESC = """ Read C language source or header FILEs, extract embedded documentation comments, and print formatted documentation to standard output. @@ -176,7 +178,21 @@ class MsgFormatter(logging.Formatter): return logging.Formatter.format(self, record) def main(): - """Main program""" + """ + Main program. + + By default, the return value is: + + - 0: success or Python version is not compatible with + kernel-doc. If -Werror is not used, it will also + return 0 if there are issues at kernel-doc markups; + + - 1: an abnormal condition happened; + + - 2: argparse issued an error; + + - 3: -Werror is used, and one or more unfiltered parse warnings happened. + """ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=DESC) @@ -323,16 +339,12 @@ def main(): if args.werror: print("%s warnings as errors" % error_count) # pylint: disable=C0209 - sys.exit(error_count) + sys.exit(WERROR_RETURN_CODE) if args.verbose: print("%s errors" % error_count) # pylint: disable=C0209 - if args.none: - sys.exit(0) - - sys.exit(error_count) - + sys.exit(0) # Call main method if __name__ == "__main__": -- cgit v1.2.3 From bd28e99720f3f070e80551b5e94695640d3a730a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 19 Jan 2026 13:04:58 +0100 Subject: docs: kdoc: ensure that comments are using our coding style Along kernel-doc libs, we opted to have all comments starting/ending with a blank comment line. Use the same style here. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <50e430acd333a500719205e80ab3b2d297edcd7d.1768823489.git.mchehab+huawei@kernel.org> --- scripts/kernel-doc.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'scripts/kernel-doc.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 1ebb16b9bb08..f1f3f56edeb5 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -3,7 +3,7 @@ # Copyright(c) 2025: Mauro Carvalho Chehab . # # pylint: disable=C0103,R0912,R0914,R0915 - +# # NOTE: While kernel-doc requires at least version 3.6 to run, the # command line should work with Python 3.2+ (tested with 3.4). # The rationale is that it shall fail gracefully during Kernel @@ -12,7 +12,7 @@ # - no f-strings can be used on this file. # - the libraries that require newer versions can only be included # after Python version is checked. - +# # Converted from the kernel-doc script originally written in Perl # under GPLv2, copyrighted since 1998 by the following authors: # @@ -197,8 +197,9 @@ def main(): parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=DESC) + # # Normal arguments - + # parser.add_argument("-v", "-verbose", "--verbose", action="store_true", help="Verbose output, more warnings and other information.") @@ -213,8 +214,9 @@ def main(): action="store_true", help="Enable line number output (only in ReST mode)") + # # Arguments to control the warning behavior - + # parser.add_argument("-Wreturn", "--wreturn", action="store_true", help="Warns about the lack of a return markup on functions.") @@ -235,8 +237,9 @@ def main(): parser.add_argument("-export-file", "--export-file", action='append', help=EXPORT_FILE_DESC) + # # Output format mutually-exclusive group - + # out_group = parser.add_argument_group("Output format selection (mutually exclusive)") out_fmt = out_group.add_mutually_exclusive_group() @@ -248,8 +251,9 @@ def main(): out_fmt.add_argument("-N", "-none", "--none", action="store_true", help="Do not output documentation, only warnings.") + # # Output selection mutually-exclusive group - + # sel_group = parser.add_argument_group("Output selection (mutually exclusive)") sel_mut = sel_group.add_mutually_exclusive_group() @@ -262,7 +266,9 @@ def main(): sel_mut.add_argument("-s", "-function", "--symbol", action='append', help=FUNCTION_DESC) + # # Those are valid for all 3 types of filter + # parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append', help=NOSYMBOL_DESC) @@ -295,9 +301,11 @@ def main(): python_ver = sys.version_info[:2] if python_ver < (3,6): + # # Depending on Kernel configuration, kernel-doc --none is called at # build time. As we don't want to break compilation due to the # usage of an old Python version, return 0 here. + # if args.none: logger.error("Python 3.6 or later is required by kernel-doc. skipping checks") sys.exit(0) @@ -307,7 +315,9 @@ def main(): if python_ver < (3,7): logger.warning("Python 3.7 or later is required for correct results") + # # Import kernel-doc libraries only after checking Python version + # from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 @@ -346,6 +356,8 @@ def main(): sys.exit(0) +# # Call main method +# if __name__ == "__main__": main() -- cgit v1.2.3 From 6cc45ee5df3bd89c8fa62aa2be91f747938686a1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 19 Jan 2026 13:04:59 +0100 Subject: docs: kdoc: some fixes to kernel-doc comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some typos and English errors in the comments of kernel‑doc.py. Locate them with the help of an LLM (gpt‑oss 14B), executed locally with this prompt: review English grammar and syntax at the comments on the code below: While LLM worked fine for the task of doing an English grammar review for strings, being able to distinguish them from the actual code, it was not is perfect: some things required manual work to fix. - While here, replace: "/**" with: ``/**`` As, if we ever rename this script to kernel_doc.py and add it to Sphinx ext autodoc, we want to avoid this warning: scripts/kernel_doc.py:docstring of kernel_doc:10: WARNING: Inline strong start-string without end-string. [docutils] Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/kernel-doc.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'scripts/kernel-doc.py') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index f1f3f56edeb5..4e3b9cfe3fd7 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -9,9 +9,9 @@ # The rationale is that it shall fail gracefully during Kernel # compilation with older Kernel versions. Due to that: # - encoding line is needed here; -# - no f-strings can be used on this file. -# - the libraries that require newer versions can only be included -# after Python version is checked. +# - f-strings cannot be used in this file. +# - libraries that require newer versions can only be included +# after the Python version has been checked. # # Converted from the kernel-doc script originally written in Perl # under GPLv2, copyrighted since 1998 by the following authors: @@ -88,16 +88,13 @@ # Yujie Liu """ -kernel_doc -========== - -Print formatted kernel documentation to stdout +Print formatted kernel documentation to stdout. Read C language source or header FILEs, extract embedded documentation comments, and print formatted documentation to standard output. -The documentation comments are identified by the "/**" +The documentation comments are identified by the ``/**`` opening comment mark. See Documentation/doc-guide/kernel-doc.rst for the @@ -134,13 +131,13 @@ May be used multiple times. """ EXPORT_DESC = """ -Only output documentation for the symbols that have been +Only output documentation for symbols that have been exported using EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. """ INTERNAL_DESC = """ -Only output documentation for the symbols that have NOT been +Only output documentation for symbols that have NOT been exported using EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. """ @@ -163,7 +160,7 @@ Header and C source files to be parsed. """ WARN_CONTENTS_BEFORE_SECTIONS_DESC = """ -Warns if there are contents before sections (deprecated). +Warn if there are contents before sections (deprecated). This option is kept just for backward-compatibility, but it does nothing, neither here nor at the original Perl script. @@ -171,7 +168,7 @@ neither here nor at the original Perl script. class MsgFormatter(logging.Formatter): - """Helper class to format warnings on a similar way to kernel-doc.pl""" + """Helper class to format warnings in a similar way to kernel-doc.pl.""" def format(self, record): record.levelname = record.levelname.capitalize() @@ -273,7 +270,7 @@ def main(): help=NOSYMBOL_DESC) parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections", - action='store_true', help="Don't outputt DOC sections") + action='store_true', help="Don't output DOC sections") parser.add_argument("files", metavar="FILE", nargs="+", help=FILES_DESC) @@ -302,12 +299,12 @@ def main(): python_ver = sys.version_info[:2] if python_ver < (3,6): # - # Depending on Kernel configuration, kernel-doc --none is called at + # Depending on the Kernel configuration, kernel-doc --none is called at # build time. As we don't want to break compilation due to the # usage of an old Python version, return 0 here. # if args.none: - logger.error("Python 3.6 or later is required by kernel-doc. skipping checks") + logger.error("Python 3.6 or later is required by kernel-doc. Skipping checks") sys.exit(0) sys.exit("Python 3.6 or later is required by kernel-doc. Aborting.") @@ -316,7 +313,7 @@ def main(): logger.warning("Python 3.7 or later is required for correct results") # - # Import kernel-doc libraries only after checking Python version + # Import kernel-doc libraries only after checking the Python version # from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 -- cgit v1.2.3 From eba6ffd126cd52358181ed5a179644a161f9c65f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 19 Jan 2026 13:05:01 +0100 Subject: docs: kdoc: move kernel-doc to tools/docs kernel-doc is the last documentation-related tool still living outside of the tools/docs directory; the time has come to move it over. [mchehab: fixed kdoc lib location] Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <311d17e403524349940a8b12de6b5e91e554b1f4.1768823489.git.mchehab+huawei@kernel.org> --- Documentation/conf.py | 2 +- Documentation/doc-guide/kernel-doc.rst | 8 +- Documentation/kbuild/kbuild.rst | 2 +- Documentation/process/coding-style.rst | 2 +- .../translations/it_IT/doc-guide/kernel-doc.rst | 8 +- .../translations/sp_SP/process/coding-style.rst | 2 +- .../translations/zh_CN/doc-guide/kernel-doc.rst | 10 +- Documentation/translations/zh_CN/kbuild/kbuild.rst | 2 +- .../translations/zh_CN/process/coding-style.rst | 2 +- .../translations/zh_TW/process/coding-style.rst | 2 +- MAINTAINERS | 2 - Makefile | 2 +- drivers/gpu/drm/i915/Makefile | 2 +- scripts/kernel-doc | 1 - scripts/kernel-doc.py | 360 --------------------- tools/docs/find-unused-docs.sh | 2 +- tools/docs/kernel-doc | 360 +++++++++++++++++++++ tools/docs/sphinx-build-wrapper | 2 +- 18 files changed, 384 insertions(+), 387 deletions(-) delete mode 120000 scripts/kernel-doc delete mode 100755 scripts/kernel-doc.py create mode 100755 tools/docs/kernel-doc (limited to 'scripts/kernel-doc.py') diff --git a/Documentation/conf.py b/Documentation/conf.py index 16d025af1f30..652be9221246 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -585,7 +585,7 @@ pdf_documents = [ # kernel-doc extension configuration for running Sphinx directly (e.g. by Read # the Docs). In a normal build, these are supplied from the Makefile via command # line arguments. -kerneldoc_bin = "../scripts/kernel-doc.py" +kerneldoc_bin = "../tools/docs/kernel-doc" # Not used now kerneldoc_srctree = ".." def setup(app): diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst index b56128d7f5c3..8d2c09fb36e4 100644 --- a/Documentation/doc-guide/kernel-doc.rst +++ b/Documentation/doc-guide/kernel-doc.rst @@ -54,7 +54,7 @@ Running the ``kernel-doc`` tool with increased verbosity and without actual output generation may be used to verify proper formatting of the documentation comments. For example:: - scripts/kernel-doc -v -none drivers/foo/bar.c + tools/docs/kernel-doc -v -none drivers/foo/bar.c The documentation format of ``.c`` files is also verified by the kernel build when it is requested to perform extra gcc checks:: @@ -365,7 +365,7 @@ differentiated by whether the macro name is immediately followed by a left parenthesis ('(') for function-like macros or not followed by one for object-like macros. -Function-like macros are handled like functions by ``scripts/kernel-doc``. +Function-like macros are handled like functions by ``tools/docs/kernel-doc``. They may have a parameter list. Object-like macros have do not have a parameter list. @@ -596,8 +596,8 @@ from the source file. The kernel-doc extension is included in the kernel source tree, at ``Documentation/sphinx/kerneldoc.py``. Internally, it uses the -``scripts/kernel-doc`` script to extract the documentation comments from the -source. +``tools/docs/kernel-doc`` script to extract the documentation comments from +the source. .. _kernel_doc: diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 82826b0332df..5a9013bacfb7 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -180,7 +180,7 @@ architecture. KDOCFLAGS --------- Specify extra (warning/error) flags for kernel-doc checks during the build, -see scripts/kernel-doc for which flags are supported. Note that this doesn't +see tools/docs/kernel-doc for which flags are supported. Note that this doesn't (currently) apply to documentation builds. ARCH diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 258158637f65..35b381230f6e 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -614,7 +614,7 @@ it. When commenting the kernel API functions, please use the kernel-doc format. See the files at :ref:`Documentation/doc-guide/ ` and -``scripts/kernel-doc`` for details. Note that the danger of over-commenting +``tools/docs/kernel-doc`` for details. Note that the danger of over-commenting applies to kernel-doc comments all the same. Do not add boilerplate kernel-doc which simply reiterates what's obvious from the signature of the function. diff --git a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst index aa0e31d353d6..bac959b8b7b9 100644 --- a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst +++ b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst @@ -80,7 +80,7 @@ Al fine di verificare che i commenti siano formattati correttamente, potete eseguire il programma ``kernel-doc`` con un livello di verbosità alto e senza che questo produca alcuna documentazione. Per esempio:: - scripts/kernel-doc -v -none drivers/foo/bar.c + tools/docs/kernel-doc -v -none drivers/foo/bar.c Il formato della documentazione è verificato della procedura di generazione del kernel quando viene richiesto di effettuare dei controlli extra con GCC:: @@ -378,7 +378,7 @@ distinguono in base al fatto che il nome della macro simile a funzione sia immediatamente seguito da una parentesi sinistra ('(') mentre in quelle simili a oggetti no. -Le macro simili a funzioni sono gestite come funzioni da ``scripts/kernel-doc``. +Le macro simili a funzioni sono gestite come funzioni da ``tools/docs/kernel-doc``. Possono avere un elenco di parametri. Le macro simili a oggetti non hanno un elenco di parametri. @@ -595,7 +595,7 @@ documentazione presenti nel file sorgente (*source*). L'estensione kernel-doc fa parte dei sorgenti del kernel, la si può trovare in ``Documentation/sphinx/kerneldoc.py``. Internamente, viene utilizzato -lo script ``scripts/kernel-doc`` per estrarre i commenti di documentazione +lo script ``tools/docs/kernel-doc`` per estrarre i commenti di documentazione dai file sorgenti. Come utilizzare kernel-doc per generare pagine man @@ -604,4 +604,4 @@ Come utilizzare kernel-doc per generare pagine man Se volete utilizzare kernel-doc solo per generare delle pagine man, potete farlo direttamente dai sorgenti del kernel:: - $ scripts/kernel-doc -man $(git grep -l '/\*\*' -- :^Documentation :^tools) | scripts/split-man.pl /tmp/man + $ tools/docs/kernel-doc -man $(git grep -l '/\*\*' -- :^Documentation :^tools) | scripts/split-man.pl /tmp/man diff --git a/Documentation/translations/sp_SP/process/coding-style.rst b/Documentation/translations/sp_SP/process/coding-style.rst index 025223be9706..7d63aa8426e6 100644 --- a/Documentation/translations/sp_SP/process/coding-style.rst +++ b/Documentation/translations/sp_SP/process/coding-style.rst @@ -633,7 +633,7 @@ posiblemente POR QUÉ hace esto. Al comentar las funciones de la API del kernel, utilice el formato kernel-doc. Consulte los archivos en :ref:`Documentation/doc-guide/ ` -y ``scripts/kernel-doc`` para más detalles. +y ``tools/docs/kernel-doc`` para más detalles. El estilo preferido para comentarios largos (de varias líneas) es: diff --git a/Documentation/translations/zh_CN/doc-guide/kernel-doc.rst b/Documentation/translations/zh_CN/doc-guide/kernel-doc.rst index ccfb9b8329c2..fb2bbaaa85c1 100644 --- a/Documentation/translations/zh_CN/doc-guide/kernel-doc.rst +++ b/Documentation/translations/zh_CN/doc-guide/kernel-doc.rst @@ -43,7 +43,7 @@ kernel-doc注释用 ``/**`` 作为开始标记。 ``kernel-doc`` 工具将提取 用详细模式和不生成实际输出来运行 ``kernel-doc`` 工具,可以验证文档注释的格式 是否正确。例如:: - scripts/kernel-doc -v -none drivers/foo/bar.c + tools/docs/kernel-doc -v -none drivers/foo/bar.c 当请求执行额外的gcc检查时,内核构建将验证文档格式:: @@ -473,7 +473,7 @@ doc: *title* 如果没有选项,kernel-doc指令将包含源文件中的所有文档注释。 kernel-doc扩展包含在内核源代码树中,位于 ``Documentation/sphinx/kerneldoc.py`` 。 -在内部,它使用 ``scripts/kernel-doc`` 脚本从源代码中提取文档注释。 +在内部,它使用 ``tools/docs/kernel-doc`` 脚本从源代码中提取文档注释。 .. _kernel_doc_zh: @@ -482,18 +482,18 @@ kernel-doc扩展包含在内核源代码树中,位于 ``Documentation/sphinx/k 如果您只想使用kernel-doc生成手册页,可以从内核git树这样做:: - $ scripts/kernel-doc -man \ + $ tools/docs/kernel-doc -man \ $(git grep -l '/\*\*' -- :^Documentation :^tools) \ | scripts/split-man.pl /tmp/man 一些旧版本的git不支持路径排除语法的某些变体。 以下命令之一可能适用于这些版本:: - $ scripts/kernel-doc -man \ + $ tools/docs/kernel-doc -man \ $(git grep -l '/\*\*' -- . ':!Documentation' ':!tools') \ | scripts/split-man.pl /tmp/man - $ scripts/kernel-doc -man \ + $ tools/docs/kernel-doc -man \ $(git grep -l '/\*\*' -- . ":(exclude)Documentation" ":(exclude)tools") \ | scripts/split-man.pl /tmp/man diff --git a/Documentation/translations/zh_CN/kbuild/kbuild.rst b/Documentation/translations/zh_CN/kbuild/kbuild.rst index 57f5cf5b2cdd..a477b4b08958 100644 --- a/Documentation/translations/zh_CN/kbuild/kbuild.rst +++ b/Documentation/translations/zh_CN/kbuild/kbuild.rst @@ -174,7 +174,7 @@ UTS_MACHINE 变量(在某些架构中还包括内核配置)来猜测正确 KDOCFLAGS --------- 指定在构建过程中用于 kernel-doc 检查的额外(警告/错误)标志,查看 -scripts/kernel-doc 了解支持的标志。请注意,这目前不适用于文档构建。 +tools/docs/kernel-doc 了解支持的标志。请注意,这目前不适用于文档构建。 ARCH ---- diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst index 0484d0c65c25..5a342a024c01 100644 --- a/Documentation/translations/zh_CN/process/coding-style.rst +++ b/Documentation/translations/zh_CN/process/coding-style.rst @@ -545,7 +545,7 @@ Linux 里这是提倡的做法,因为这样可以很简单的给读者提供 也可以加上它做这些事情的原因。 当注释内核 API 函数时,请使用 kernel-doc 格式。详见 -Documentation/translations/zh_CN/doc-guide/index.rst 和 scripts/kernel-doc 。 +Documentation/translations/zh_CN/doc-guide/index.rst 和 tools/docs/kernel-doc 。 长 (多行) 注释的首选风格是: diff --git a/Documentation/translations/zh_TW/process/coding-style.rst b/Documentation/translations/zh_TW/process/coding-style.rst index 311c6f6bad0b..e2ba97b3d8bb 100644 --- a/Documentation/translations/zh_TW/process/coding-style.rst +++ b/Documentation/translations/zh_TW/process/coding-style.rst @@ -548,7 +548,7 @@ Linux 裏這是提倡的做法,因爲這樣可以很簡單的給讀者提供 也可以加上它做這些事情的原因。 當註釋內核 API 函數時,請使用 kernel-doc 格式。詳見 -Documentation/translations/zh_CN/doc-guide/index.rst 和 scripts/kernel-doc 。 +Documentation/translations/zh_CN/doc-guide/index.rst 和 tools/docs/kernel-doc 。 長 (多行) 註釋的首選風格是: diff --git a/MAINTAINERS b/MAINTAINERS index 02ec226dd571..d009e2da2215 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7523,7 +7523,6 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/kernel-doc* F: tools/lib/python/* F: tools/docs/ F: tools/net/ynl/pyynl/lib/doc_generator.py @@ -7561,7 +7560,6 @@ M: Mauro Carvalho Chehab L: linux-doc@vger.kernel.org S: Maintained F: Documentation/sphinx/ -F: scripts/kernel-doc* F: tools/lib/python/* F: tools/docs/ diff --git a/Makefile b/Makefile index 3cd00b62cde9..81a4ab11256c 100644 --- a/Makefile +++ b/Makefile @@ -460,7 +460,7 @@ HOSTPKG_CONFIG = pkg-config # the KERNELDOC macro needs to be exported, as scripts/Makefile.build # has a logic to call it -KERNELDOC = $(srctree)/scripts/kernel-doc.py +KERNELDOC = $(srctree)/tools/docs/kernel-doc export KERNELDOC KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4db24050edb0..c979c579de66 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -443,7 +443,7 @@ always-$(CONFIG_DRM_I915_WERROR) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; \ - $(srctree)/scripts/kernel-doc -none -Werror $<; touch $@ + $(KERNELDOC) -none -Werror $<; touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) diff --git a/scripts/kernel-doc b/scripts/kernel-doc deleted file mode 120000 index 3b6ef807791a..000000000000 --- a/scripts/kernel-doc +++ /dev/null @@ -1 +0,0 @@ -kernel-doc.py \ No newline at end of file diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py deleted file mode 100755 index 4e3b9cfe3fd7..000000000000 --- a/scripts/kernel-doc.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0103,R0912,R0914,R0915 -# -# NOTE: While kernel-doc requires at least version 3.6 to run, the -# command line should work with Python 3.2+ (tested with 3.4). -# The rationale is that it shall fail gracefully during Kernel -# compilation with older Kernel versions. Due to that: -# - encoding line is needed here; -# - f-strings cannot be used in this file. -# - libraries that require newer versions can only be included -# after the Python version has been checked. -# -# Converted from the kernel-doc script originally written in Perl -# under GPLv2, copyrighted since 1998 by the following authors: -# -# Aditya Srivastava -# Akira Yokosawa -# Alexander A. Klimov -# Alexander Lobakin -# André Almeida -# Andy Shevchenko -# Anna-Maria Behnsen -# Armin Kuster -# Bart Van Assche -# Ben Hutchings -# Borislav Petkov -# Chen-Yu Tsai -# Coco Li -# Conchúr Navid -# Daniel Santos -# Danilo Cesar Lemes de Paula -# Dan Luedtke -# Donald Hunter -# Gabriel Krisman Bertazi -# Greg Kroah-Hartman -# Harvey Harrison -# Horia Geanta -# Ilya Dryomov -# Jakub Kicinski -# Jani Nikula -# Jason Baron -# Jason Gunthorpe -# Jérémy Bobbio -# Johannes Berg -# Johannes Weiner -# Jonathan Cameron -# Jonathan Corbet -# Jonathan Neuschäfer -# Kamil Rytarowski -# Kees Cook -# Laurent Pinchart -# Levin, Alexander (Sasha Levin) -# Linus Torvalds -# Lucas De Marchi -# Mark Rutland -# Markus Heiser -# Martin Waitz -# Masahiro Yamada -# Matthew Wilcox -# Mauro Carvalho Chehab -# Michal Wajdeczko -# Michael Zucchi -# Mike Rapoport -# Niklas Söderlund -# Nishanth Menon -# Paolo Bonzini -# Pavan Kumar Linga -# Pavel Pisa -# Peter Maydell -# Pierre-Louis Bossart -# Randy Dunlap -# Richard Kennedy -# Rich Walker -# Rolf Eike Beer -# Sakari Ailus -# Silvio Fricke -# Simon Huggins -# Tim Waugh -# Tomasz Warniełło -# Utkarsh Tripathi -# valdis.kletnieks@vt.edu -# Vegard Nossum -# Will Deacon -# Yacine Belkadi -# Yujie Liu - -""" -Print formatted kernel documentation to stdout. - -Read C language source or header FILEs, extract embedded -documentation comments, and print formatted documentation -to standard output. - -The documentation comments are identified by the ``/**`` -opening comment mark. - -See Documentation/doc-guide/kernel-doc.rst for the -documentation comment syntax. -""" - -import argparse -import logging -import os -import sys - -# Import Python modules - -LIB_DIR = "../tools/lib/python" -SRC_DIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - -WERROR_RETURN_CODE = 3 - -DESC = """ -Read C language source or header FILEs, extract embedded documentation comments, -and print formatted documentation to standard output. - -The documentation comments are identified by the "/**" opening comment mark. - -See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. -""" - -EXPORT_FILE_DESC = """ -Specify an additional FILE in which to look for EXPORT_SYMBOL information. - -May be used multiple times. -""" - -EXPORT_DESC = """ -Only output documentation for symbols that have been -exported using EXPORT_SYMBOL() and related macros in any input -FILE or -export-file FILE. -""" - -INTERNAL_DESC = """ -Only output documentation for symbols that have NOT been -exported using EXPORT_SYMBOL() and related macros in any input -FILE or -export-file FILE. -""" - -FUNCTION_DESC = """ -Only output documentation for the given function or DOC: section -title. All other functions and DOC: sections are ignored. - -May be used multiple times. -""" - -NOSYMBOL_DESC = """ -Exclude the specified symbol from the output documentation. - -May be used multiple times. -""" - -FILES_DESC = """ -Header and C source files to be parsed. -""" - -WARN_CONTENTS_BEFORE_SECTIONS_DESC = """ -Warn if there are contents before sections (deprecated). - -This option is kept just for backward-compatibility, but it does nothing, -neither here nor at the original Perl script. -""" - - -class MsgFormatter(logging.Formatter): - """Helper class to format warnings in a similar way to kernel-doc.pl.""" - - def format(self, record): - record.levelname = record.levelname.capitalize() - return logging.Formatter.format(self, record) - -def main(): - """ - Main program. - - By default, the return value is: - - - 0: success or Python version is not compatible with - kernel-doc. If -Werror is not used, it will also - return 0 if there are issues at kernel-doc markups; - - - 1: an abnormal condition happened; - - - 2: argparse issued an error; - - - 3: -Werror is used, and one or more unfiltered parse warnings happened. - """ - - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, - description=DESC) - - # - # Normal arguments - # - parser.add_argument("-v", "-verbose", "--verbose", action="store_true", - help="Verbose output, more warnings and other information.") - - parser.add_argument("-d", "-debug", "--debug", action="store_true", - help="Enable debug messages") - - parser.add_argument("-M", "-modulename", "--modulename", - default="Kernel API", - help="Allow setting a module name at the output.") - - parser.add_argument("-l", "-enable-lineno", "--enable_lineno", - action="store_true", - help="Enable line number output (only in ReST mode)") - - # - # Arguments to control the warning behavior - # - parser.add_argument("-Wreturn", "--wreturn", action="store_true", - help="Warns about the lack of a return markup on functions.") - - parser.add_argument("-Wshort-desc", "-Wshort-description", "--wshort-desc", - action="store_true", - help="Warns if initial short description is missing") - - parser.add_argument("-Wcontents-before-sections", - "--wcontents-before-sections", action="store_true", - help=WARN_CONTENTS_BEFORE_SECTIONS_DESC) - - parser.add_argument("-Wall", "--wall", action="store_true", - help="Enable all types of warnings") - - parser.add_argument("-Werror", "--werror", action="store_true", - help="Treat warnings as errors.") - - parser.add_argument("-export-file", "--export-file", action='append', - help=EXPORT_FILE_DESC) - - # - # Output format mutually-exclusive group - # - out_group = parser.add_argument_group("Output format selection (mutually exclusive)") - - out_fmt = out_group.add_mutually_exclusive_group() - - out_fmt.add_argument("-m", "-man", "--man", action="store_true", - help="Output troff manual page format.") - out_fmt.add_argument("-r", "-rst", "--rst", action="store_true", - help="Output reStructuredText format (default).") - out_fmt.add_argument("-N", "-none", "--none", action="store_true", - help="Do not output documentation, only warnings.") - - # - # Output selection mutually-exclusive group - # - sel_group = parser.add_argument_group("Output selection (mutually exclusive)") - sel_mut = sel_group.add_mutually_exclusive_group() - - sel_mut.add_argument("-e", "-export", "--export", action='store_true', - help=EXPORT_DESC) - - sel_mut.add_argument("-i", "-internal", "--internal", action='store_true', - help=INTERNAL_DESC) - - sel_mut.add_argument("-s", "-function", "--symbol", action='append', - help=FUNCTION_DESC) - - # - # Those are valid for all 3 types of filter - # - parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append', - help=NOSYMBOL_DESC) - - parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections", - action='store_true', help="Don't output DOC sections") - - parser.add_argument("files", metavar="FILE", - nargs="+", help=FILES_DESC) - - args = parser.parse_args() - - if args.wall: - args.wreturn = True - args.wshort_desc = True - args.wcontents_before_sections = True - - logger = logging.getLogger() - - if not args.debug: - logger.setLevel(logging.INFO) - else: - logger.setLevel(logging.DEBUG) - - formatter = MsgFormatter('%(levelname)s: %(message)s') - - handler = logging.StreamHandler() - handler.setFormatter(formatter) - - logger.addHandler(handler) - - python_ver = sys.version_info[:2] - if python_ver < (3,6): - # - # Depending on the Kernel configuration, kernel-doc --none is called at - # build time. As we don't want to break compilation due to the - # usage of an old Python version, return 0 here. - # - if args.none: - logger.error("Python 3.6 or later is required by kernel-doc. Skipping checks") - sys.exit(0) - - sys.exit("Python 3.6 or later is required by kernel-doc. Aborting.") - - if python_ver < (3,7): - logger.warning("Python 3.7 or later is required for correct results") - - # - # Import kernel-doc libraries only after checking the Python version - # - from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 - from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 - - if args.man: - out_style = ManFormat(modulename=args.modulename) - elif args.none: - out_style = None - else: - out_style = RestFormat() - - kfiles = KernelFiles(verbose=args.verbose, - out_style=out_style, werror=args.werror, - wreturn=args.wreturn, wshort_desc=args.wshort_desc, - wcontents_before_sections=args.wcontents_before_sections) - - kfiles.parse(args.files, export_file=args.export_file) - - for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export, - internal=args.internal, symbol=args.symbol, - nosymbol=args.nosymbol, export_file=args.export_file, - no_doc_sections=args.no_doc_sections): - msg = t[1] - if msg: - print(msg) - - error_count = kfiles.errors - if not error_count: - sys.exit(0) - - if args.werror: - print("%s warnings as errors" % error_count) # pylint: disable=C0209 - sys.exit(WERROR_RETURN_CODE) - - if args.verbose: - print("%s errors" % error_count) # pylint: disable=C0209 - - sys.exit(0) - -# -# Call main method -# -if __name__ == "__main__": - main() diff --git a/tools/docs/find-unused-docs.sh b/tools/docs/find-unused-docs.sh index ca4e607ec3f7..53514c759dc1 100755 --- a/tools/docs/find-unused-docs.sh +++ b/tools/docs/find-unused-docs.sh @@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do if [[ ${FILES_INCLUDED[$file]+_} ]]; then continue; fi - str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null) + str=$(PYTHONDONTWRITEBYTECODE=1 tools/docs/kernel-doc -export "$file" 2>/dev/null) if [[ -n "$str" ]]; then echo "$file" fi diff --git a/tools/docs/kernel-doc b/tools/docs/kernel-doc new file mode 100755 index 000000000000..a19a92566780 --- /dev/null +++ b/tools/docs/kernel-doc @@ -0,0 +1,360 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0103,R0912,R0914,R0915 +# +# NOTE: While kernel-doc requires at least version 3.6 to run, the +# command line should work with Python 3.2+ (tested with 3.4). +# The rationale is that it shall fail gracefully during Kernel +# compilation with older Kernel versions. Due to that: +# - encoding line is needed here; +# - f-strings cannot be used in this file. +# - libraries that require newer versions can only be included +# after the Python version has been checked. +# +# Converted from the kernel-doc script originally written in Perl +# under GPLv2, copyrighted since 1998 by the following authors: +# +# Aditya Srivastava +# Akira Yokosawa +# Alexander A. Klimov +# Alexander Lobakin +# André Almeida +# Andy Shevchenko +# Anna-Maria Behnsen +# Armin Kuster +# Bart Van Assche +# Ben Hutchings +# Borislav Petkov +# Chen-Yu Tsai +# Coco Li +# Conchúr Navid +# Daniel Santos +# Danilo Cesar Lemes de Paula +# Dan Luedtke +# Donald Hunter +# Gabriel Krisman Bertazi +# Greg Kroah-Hartman +# Harvey Harrison +# Horia Geanta +# Ilya Dryomov +# Jakub Kicinski +# Jani Nikula +# Jason Baron +# Jason Gunthorpe +# Jérémy Bobbio +# Johannes Berg +# Johannes Weiner +# Jonathan Cameron +# Jonathan Corbet +# Jonathan Neuschäfer +# Kamil Rytarowski +# Kees Cook +# Laurent Pinchart +# Levin, Alexander (Sasha Levin) +# Linus Torvalds +# Lucas De Marchi +# Mark Rutland +# Markus Heiser +# Martin Waitz +# Masahiro Yamada +# Matthew Wilcox +# Mauro Carvalho Chehab +# Michal Wajdeczko +# Michael Zucchi +# Mike Rapoport +# Niklas Söderlund +# Nishanth Menon +# Paolo Bonzini +# Pavan Kumar Linga +# Pavel Pisa +# Peter Maydell +# Pierre-Louis Bossart +# Randy Dunlap +# Richard Kennedy +# Rich Walker +# Rolf Eike Beer +# Sakari Ailus +# Silvio Fricke +# Simon Huggins +# Tim Waugh +# Tomasz Warniełło +# Utkarsh Tripathi +# valdis.kletnieks@vt.edu +# Vegard Nossum +# Will Deacon +# Yacine Belkadi +# Yujie Liu + +""" +Print formatted kernel documentation to stdout. + +Read C language source or header FILEs, extract embedded +documentation comments, and print formatted documentation +to standard output. + +The documentation comments are identified by the ``/**`` +opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the +documentation comment syntax. +""" + +import argparse +import logging +import os +import sys + +# Import Python modules + +LIB_DIR = "../lib/python" +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) + +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) + +WERROR_RETURN_CODE = 3 + +DESC = """ +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by the "/**" opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. +""" + +EXPORT_FILE_DESC = """ +Specify an additional FILE in which to look for EXPORT_SYMBOL information. + +May be used multiple times. +""" + +EXPORT_DESC = """ +Only output documentation for symbols that have been +exported using EXPORT_SYMBOL() and related macros in any input +FILE or -export-file FILE. +""" + +INTERNAL_DESC = """ +Only output documentation for symbols that have NOT been +exported using EXPORT_SYMBOL() and related macros in any input +FILE or -export-file FILE. +""" + +FUNCTION_DESC = """ +Only output documentation for the given function or DOC: section +title. All other functions and DOC: sections are ignored. + +May be used multiple times. +""" + +NOSYMBOL_DESC = """ +Exclude the specified symbol from the output documentation. + +May be used multiple times. +""" + +FILES_DESC = """ +Header and C source files to be parsed. +""" + +WARN_CONTENTS_BEFORE_SECTIONS_DESC = """ +Warn if there are contents before sections (deprecated). + +This option is kept just for backward-compatibility, but it does nothing, +neither here nor at the original Perl script. +""" + + +class MsgFormatter(logging.Formatter): + """Helper class to format warnings in a similar way to kernel-doc.pl.""" + + def format(self, record): + record.levelname = record.levelname.capitalize() + return logging.Formatter.format(self, record) + +def main(): + """ + Main program. + + By default, the return value is: + + - 0: success or Python version is not compatible with + kernel-doc. If -Werror is not used, it will also + return 0 if there are issues at kernel-doc markups; + + - 1: an abnormal condition happened; + + - 2: argparse issued an error; + + - 3: -Werror is used, and one or more unfiltered parse warnings happened. + """ + + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, + description=DESC) + + # + # Normal arguments + # + parser.add_argument("-v", "-verbose", "--verbose", action="store_true", + help="Verbose output, more warnings and other information.") + + parser.add_argument("-d", "-debug", "--debug", action="store_true", + help="Enable debug messages") + + parser.add_argument("-M", "-modulename", "--modulename", + default="Kernel API", + help="Allow setting a module name at the output.") + + parser.add_argument("-l", "-enable-lineno", "--enable_lineno", + action="store_true", + help="Enable line number output (only in ReST mode)") + + # + # Arguments to control the warning behavior + # + parser.add_argument("-Wreturn", "--wreturn", action="store_true", + help="Warns about the lack of a return markup on functions.") + + parser.add_argument("-Wshort-desc", "-Wshort-description", "--wshort-desc", + action="store_true", + help="Warns if initial short description is missing") + + parser.add_argument("-Wcontents-before-sections", + "--wcontents-before-sections", action="store_true", + help=WARN_CONTENTS_BEFORE_SECTIONS_DESC) + + parser.add_argument("-Wall", "--wall", action="store_true", + help="Enable all types of warnings") + + parser.add_argument("-Werror", "--werror", action="store_true", + help="Treat warnings as errors.") + + parser.add_argument("-export-file", "--export-file", action='append', + help=EXPORT_FILE_DESC) + + # + # Output format mutually-exclusive group + # + out_group = parser.add_argument_group("Output format selection (mutually exclusive)") + + out_fmt = out_group.add_mutually_exclusive_group() + + out_fmt.add_argument("-m", "-man", "--man", action="store_true", + help="Output troff manual page format.") + out_fmt.add_argument("-r", "-rst", "--rst", action="store_true", + help="Output reStructuredText format (default).") + out_fmt.add_argument("-N", "-none", "--none", action="store_true", + help="Do not output documentation, only warnings.") + + # + # Output selection mutually-exclusive group + # + sel_group = parser.add_argument_group("Output selection (mutually exclusive)") + sel_mut = sel_group.add_mutually_exclusive_group() + + sel_mut.add_argument("-e", "-export", "--export", action='store_true', + help=EXPORT_DESC) + + sel_mut.add_argument("-i", "-internal", "--internal", action='store_true', + help=INTERNAL_DESC) + + sel_mut.add_argument("-s", "-function", "--symbol", action='append', + help=FUNCTION_DESC) + + # + # Those are valid for all 3 types of filter + # + parser.add_argument("-n", "-nosymbol", "--nosymbol", action='append', + help=NOSYMBOL_DESC) + + parser.add_argument("-D", "-no-doc-sections", "--no-doc-sections", + action='store_true', help="Don't output DOC sections") + + parser.add_argument("files", metavar="FILE", + nargs="+", help=FILES_DESC) + + args = parser.parse_args() + + if args.wall: + args.wreturn = True + args.wshort_desc = True + args.wcontents_before_sections = True + + logger = logging.getLogger() + + if not args.debug: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.DEBUG) + + formatter = MsgFormatter('%(levelname)s: %(message)s') + + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + logger.addHandler(handler) + + python_ver = sys.version_info[:2] + if python_ver < (3,6): + # + # Depending on the Kernel configuration, kernel-doc --none is called at + # build time. As we don't want to break compilation due to the + # usage of an old Python version, return 0 here. + # + if args.none: + logger.error("Python 3.6 or later is required by kernel-doc. Skipping checks") + sys.exit(0) + + sys.exit("Python 3.6 or later is required by kernel-doc. Aborting.") + + if python_ver < (3,7): + logger.warning("Python 3.7 or later is required for correct results") + + # + # Import kernel-doc libraries only after checking the Python version + # + from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + + if args.man: + out_style = ManFormat(modulename=args.modulename) + elif args.none: + out_style = None + else: + out_style = RestFormat() + + kfiles = KernelFiles(verbose=args.verbose, + out_style=out_style, werror=args.werror, + wreturn=args.wreturn, wshort_desc=args.wshort_desc, + wcontents_before_sections=args.wcontents_before_sections) + + kfiles.parse(args.files, export_file=args.export_file) + + for t in kfiles.msg(enable_lineno=args.enable_lineno, export=args.export, + internal=args.internal, symbol=args.symbol, + nosymbol=args.nosymbol, export_file=args.export_file, + no_doc_sections=args.no_doc_sections): + msg = t[1] + if msg: + print(msg) + + error_count = kfiles.errors + if not error_count: + sys.exit(0) + + if args.werror: + print("%s warnings as errors" % error_count) # pylint: disable=C0209 + sys.exit(WERROR_RETURN_CODE) + + if args.verbose: + print("%s errors" % error_count) # pylint: disable=C0209 + + sys.exit(0) + +# +# Call main method +# +if __name__ == "__main__": + main() diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 7a5fcef25429..cb2a5005e633 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -246,7 +246,7 @@ class SphinxBuilder: # self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build") self.kerneldoc = self.get_path(os.environ.get("KERNELDOC", - "scripts/kernel-doc.py")) + "tools/docs/kernel-doc")) self.builddir = self.get_path(builddir, use_cwd=True, abs_path=True) # -- cgit v1.2.3