From 2a14f021210fcbc271591d4c592eb4adca6bf127 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:35 +0200 Subject: scripts/jobserver-exec: move the code to a class Convert the code inside jobserver-exec to a class and properly document it. Using a class allows reusing the jobserver logic on other scripts. While the main code remains unchanged, being compatible with Python 2.6 and 3.0+, its coding style now follows a more modern standard, having tabs replaced by a 4-spaces indent, passing autopep8, black and pylint. The code allows using a pythonic way to enter/exit a python code, e.g. it now supports: with JobserverExec() as jobserver: jobserver.run(sys.argv[1:]) With the new code, the __exit__() function should ensure that the jobserver slot will be closed at the end, even if something bad happens somewhere. Signed-off-by: Mauro Carvalho Chehab Message-ID: <4749921b75d4e0bd85a25d4d94aa2c940fad084e.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 218 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 151 insertions(+), 67 deletions(-) (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 7eca035472d3..897c0cca9e6e 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,77 +1,161 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ # +# pylint: disable=C0103,C0209 +# # This determines how many parallel tasks "make" is expecting, as it is # not exposed via an special variables, reserves them all, runs a subprocess # with PARALLELISM environment variable set, and releases the jobs back again. # # https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -from __future__ import print_function -import os, sys, errno + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. +""" + +import errno +import os import subprocess +import sys + + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd) + + +def main(): + """Main program""" + with JobserverExec() as jobserver: + jobserver.run(sys.argv[1:]) + -# Extract and prepare jobserver file descriptors from environment. -claim = 0 -jobs = b"" -try: - # Fetch the make environment options. - flags = os.environ['MAKEFLAGS'] - - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition('fifo:') - - if path: - reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - writer = os.open(path, os.O_WRONLY) - else: - reader, writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - reader = os.open("/proc/self/fd/%d" % (reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible. - while True: - try: - slot = os.read(reader, 8) - jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if len(jobs): - os.write(writer, jobs) - raise e - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - claim = len(jobs) + 1 -except (KeyError, IndexError, ValueError, OSError, IOError) as e: - # Any missing environment strings or bad fds should result in just - # not being parallel. - pass - -# We can only claim parallelism if there was a jobserver (i.e. a top-level -# "-jN" argument) and there were no other failures. Otherwise leave out the -# environment variable and let the child figure out what is best. -if claim > 0: - os.environ['PARALLELISM'] = '%d' % (claim) - -rc = subprocess.call(sys.argv[1:]) - -# Return all the reserved slots. -if len(jobs): - os.write(writer, jobs) - -sys.exit(rc) +if __name__ == "__main__": + main() -- cgit v1.2.3 From fce6df7e7384ba82ea718b14974f33c1b697cf18 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:36 +0200 Subject: scripts/jobserver-exec: move its class to the lib directory To make it easier to be re-used, move the JobserverExec class to the library directory. Signed-off-by: Mauro Carvalho Chehab Message-ID: <6be7b161b6c005a9807162ebfd239af6a4e6fa47.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 152 ++++------------------------------------------- scripts/lib/jobserver.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 141 deletions(-) create mode 100755 scripts/lib/jobserver.py (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 897c0cca9e6e..40a0f0058733 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,155 +1,25 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# This determines how many parallel tasks "make" is expecting, as it is -# not exposed via an special variables, reserves them all, runs a subprocess -# with PARALLELISM environment variable set, and releases the jobs back again. -# -# https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. -""" - -import errno import os -import subprocess import sys +LIB_DIR = "lib" +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) +from jobserver import JobserverExec # pylint: disable=C0415 - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd) +""" +Determines how many parallel tasks "make" is expecting, as it is +not exposed via an special variables, reserves them all, runs a subprocess +with PARALLELISM environment variable set, and releases the jobs back again. +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" def main(): """Main program""" diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/scripts/lib/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) -- cgit v1.2.3 From a84a5d0b5a184551eeded75b8df6440bd81e84f4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:37 +0200 Subject: scripts/jobserver-exec: add a help message Currently, calling it without an argument shows an ugly error message. Instead, print a message using pythondoc as description. Signed-off-by: Mauro Carvalho Chehab Message-ID: <64b0339eac54ac0f2b3de3667a7f4f5becb1c6ae.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 40a0f0058733..ae23afd344ec 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,6 +1,15 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ +""" +Determines how many parallel tasks "make" is expecting, as it is +not exposed via any special variables, reserves them all, runs a subprocess +with PARALLELISM environment variable set, and releases the jobs back again. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + import os import sys @@ -12,17 +21,12 @@ sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) from jobserver import JobserverExec # pylint: disable=C0415 -""" -Determines how many parallel tasks "make" is expecting, as it is -not exposed via an special variables, reserves them all, runs a subprocess -with PARALLELISM environment variable set, and releases the jobs back again. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - def main(): """Main program""" + if len(sys.argv) < 2: + name = os.path.basename(__file__) + sys.exit("usage: " + name +" command [args ...]\n" + __doc__) + with JobserverExec() as jobserver: jobserver.run(sys.argv[1:]) -- cgit v1.2.3 From 75539bec27ddf4ac206b74d307ba9e92dbaaece7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:38 +0200 Subject: scripts: check-variable-fonts.sh: convert to Python This script handle errors when trying to build translations with make pdfdocs. As part of our cleanup work to remove hacks from docs Makefile, convert this to python, preparing it to be part of a library to be called by sphinx-build-wrapper. Signed-off-by: Mauro Carvalho Chehab Message-ID: Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 2 +- MAINTAINERS | 2 +- scripts/check-variable-fonts.py | 165 ++++++++++++++++++++++++++++++++++++++++ scripts/check-variable-fonts.sh | 115 ---------------------------- 4 files changed, 167 insertions(+), 117 deletions(-) create mode 100755 scripts/check-variable-fonts.py delete mode 100755 scripts/check-variable-fonts.sh (limited to 'scripts') diff --git a/Documentation/Makefile b/Documentation/Makefile index 3609cb86137b..8dca118e9520 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -149,7 +149,7 @@ pdfdocs: DENY_VF = XDG_CONFIG_HOME=$(FONTS_CONF_DENY_VF) pdfdocs: latexdocs @$(srctree)/scripts/sphinx-pre-install --version-check $(foreach var,$(SPHINXDIRS), \ - $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || sh $(srctree)/scripts/check-variable-fonts.sh || exit; \ + $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || $(PYTHON3) $(srctree)/scripts/check-variable-fonts.py || exit; \ mkdir -p $(BUILDDIR)/$(var)/pdf; \ mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \ ) diff --git a/MAINTAINERS b/MAINTAINERS index ac47a5d0d8e8..b0e9b793519f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7301,7 +7301,7 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/check-variable-fonts.sh +F: scripts/check-variable-fonts.py F: scripts/checktransupdate.py F: scripts/documentation-file-ref-check F: scripts/get_abi.py diff --git a/scripts/check-variable-fonts.py b/scripts/check-variable-fonts.py new file mode 100755 index 000000000000..8be1c0f39588 --- /dev/null +++ b/scripts/check-variable-fonts.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 +# +# For "make pdfdocs", reports of build errors of translations.pdf started +# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +# tumbleweed have started deploying variable-font [3] format of "Noto CJK" +# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +# does not (and likely never will) understand variable fonts for historical +# reasons. +# +# The build error happens even when both of variable- and non-variable-format +# fonts are found on the build system. To make matters worse, Fedora enlists +# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +# -zh_TW, etc. Hence developers who have interest in CJK pages are more +# likely to encounter the build errors. +# +# This script is invoked from the error path of "make pdfdocs" and emits +# suggestions if variable-font files of "Noto CJK" fonts are in the list of +# fonts accessible from XeTeX. +# +# References: +# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +# [3]: https://en.wikipedia.org/wiki/Variable_font +# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +# [5]: https://build.opensuse.org/request/show/1157217 +# +#=========================================================================== +# Workarounds for building translations.pdf +#=========================================================================== +# +# * Denylist "variable font" Noto CJK fonts. +# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with +# tweaks if necessary. Remove leading "# ". +# - Path of fontconfig/fonts.conf can be overridden by setting an env +# variable FONTS_CONF_DENY_VF. +# +# * Template: +# ----------------------------------------------------------------- +# +# +# +# +# +# +# +# /usr/share/fonts/google-noto-*-cjk-vf-fonts +# +# /usr/share/fonts/truetype/Noto*CJK*-VF.otf +# +# +# +# ----------------------------------------------------------------- +# +# The denylisting is activated for "make pdfdocs". +# +# * For skipping CJK pages in PDF +# - Uninstall texlive-xecjk. +# Denylisting is not needed in this case. +# +# * For printing CJK pages in PDF +# - Need non-variable "Noto CJK" fonts. +# * Fedora +# - google-noto-sans-cjk-fonts +# - google-noto-serif-cjk-fonts +# * openSUSE tumbleweed +# - Non-variable "Noto CJK" fonts are not available as distro packages +# as of April, 2024. Fetch a set of font files from upstream Noto +# CJK Font released at: +# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc +# and at: +# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc +# , then uncompress and deploy them. +# - Remember to update fontconfig cache by running fc-cache. +# +# !!! Caution !!! +# Uninstalling "variable font" packages can be dangerous. +# They might be depended upon by other packages important for your work. +# Denylisting should be less invasive, as it is effective only while +# XeLaTeX runs in "make pdfdocs". + +import os +import re +import subprocess +import sys +import textwrap + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self): + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, see header + comments of {rel_file}. + """) + msg += "=" * 77 + + return msg + +if __name__ == "__main__": + msg = LatexFontChecker().check() + if msg: + print(msg) + + sys.exit(1) diff --git a/scripts/check-variable-fonts.sh b/scripts/check-variable-fonts.sh deleted file mode 100755 index ce63f0acea5f..000000000000 --- a/scripts/check-variable-fonts.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# For "make pdfdocs", reports of build errors of translations.pdf started -# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -# tumbleweed have started deploying variable-font [3] format of "Noto CJK" -# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -# does not (and likely never will) understand variable fonts for historical -# reasons. -# -# The build error happens even when both of variable- and non-variable-format -# fonts are found on the build system. To make matters worse, Fedora enlists -# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, -# -zh_TW, etc. Hence developers who have interest in CJK pages are more -# likely to encounter the build errors. -# -# This script is invoked from the error path of "make pdfdocs" and emits -# suggestions if variable-font files of "Noto CJK" fonts are in the list of -# fonts accessible from XeTeX. -# -# References: -# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -# [3]: https://en.wikipedia.org/wiki/Variable_font -# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -# [5]: https://build.opensuse.org/request/show/1157217 -# -#=========================================================================== -# Workarounds for building translations.pdf -#=========================================================================== -# -# * Denylist "variable font" Noto CJK fonts. -# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with -# tweaks if necessary. Remove leading "# ". -# - Path of fontconfig/fonts.conf can be overridden by setting an env -# variable FONTS_CONF_DENY_VF. -# -# * Template: -# ----------------------------------------------------------------- -# -# -# -# -# -# -# -# /usr/share/fonts/google-noto-*-cjk-vf-fonts -# -# /usr/share/fonts/truetype/Noto*CJK*-VF.otf -# -# -# -# ----------------------------------------------------------------- -# -# The denylisting is activated for "make pdfdocs". -# -# * For skipping CJK pages in PDF -# - Uninstall texlive-xecjk. -# Denylisting is not needed in this case. -# -# * For printing CJK pages in PDF -# - Need non-variable "Noto CJK" fonts. -# * Fedora -# - google-noto-sans-cjk-fonts -# - google-noto-serif-cjk-fonts -# * openSUSE tumbleweed -# - Non-variable "Noto CJK" fonts are not available as distro packages -# as of April, 2024. Fetch a set of font files from upstream Noto -# CJK Font released at: -# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc -# and at: -# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc -# , then uncompress and deploy them. -# - Remember to update fontconfig cache by running fc-cache. -# -# !!! Caution !!! -# Uninstalling "variable font" packages can be dangerous. -# They might be depended upon by other packages important for your work. -# Denylisting should be less invasive, as it is effective only while -# XeLaTeX runs in "make pdfdocs". - -# Default per-user fontconfig path (overridden by env variable) -: ${FONTS_CONF_DENY_VF:=$HOME/deny-vf} - -export XDG_CONFIG_HOME=${FONTS_CONF_DENY_VF} - -notocjkvffonts=`fc-list : file family variable | \ - grep 'variable=True' | \ - grep -E -e 'Noto (Sans|Sans Mono|Serif) CJK' | \ - sed -e 's/^/ /' -e 's/: Noto S.*$//' | sort | uniq` - -if [ "x$notocjkvffonts" != "x" ] ; then - echo '=============================================================================' - echo 'XeTeX is confused by "variable font" files listed below:' - echo "$notocjkvffonts" - echo - echo 'For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.' - echo 'Or, CJK pages can be skipped by uninstalling texlive-xecjk.' - echo - echo 'For more info on denylisting, other options, and variable font, see header' - echo 'comments of scripts/check-variable-fonts.sh.' - echo '=============================================================================' -fi - -# As this script is invoked from Makefile's error path, always error exit -# regardless of whether any variable font is discovered or not. -exit 1 -- cgit v1.2.3 From 4515ffdf3cbc384cb7bbb699bcd1db5705862cfa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:39 +0200 Subject: tools/docs: check-variable-fonts.py: split into a lib and an exec file As we'll be using the actual code inside sphinx-build-wrapper, split the library from the executable, placing the exec at the new place we've been using: tools/docs No functional changes. Signed-off-by: Mauro Carvalho Chehab Message-ID: <8adbc22df1d43b1c5a673799d2333cc429ffe9fc.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 2 +- MAINTAINERS | 1 - scripts/check-variable-fonts.py | 165 ------------------------------------- tools/docs/check-variable-fonts.py | 23 ++++++ tools/docs/lib/latex_fonts.py | 162 ++++++++++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+), 167 deletions(-) delete mode 100755 scripts/check-variable-fonts.py create mode 100755 tools/docs/check-variable-fonts.py create mode 100755 tools/docs/lib/latex_fonts.py (limited to 'scripts') diff --git a/Documentation/Makefile b/Documentation/Makefile index 8dca118e9520..0b71f82f73f6 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -149,7 +149,7 @@ pdfdocs: DENY_VF = XDG_CONFIG_HOME=$(FONTS_CONF_DENY_VF) pdfdocs: latexdocs @$(srctree)/scripts/sphinx-pre-install --version-check $(foreach var,$(SPHINXDIRS), \ - $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || $(PYTHON3) $(srctree)/scripts/check-variable-fonts.py || exit; \ + $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || $(PYTHON3) $(srctree)/tools/docs/check-variable-fonts.py || exit; \ mkdir -p $(BUILDDIR)/$(var)/pdf; \ mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \ ) diff --git a/MAINTAINERS b/MAINTAINERS index b0e9b793519f..473152720b17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7301,7 +7301,6 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/check-variable-fonts.py F: scripts/checktransupdate.py F: scripts/documentation-file-ref-check F: scripts/get_abi.py diff --git a/scripts/check-variable-fonts.py b/scripts/check-variable-fonts.py deleted file mode 100755 index 8be1c0f39588..000000000000 --- a/scripts/check-variable-fonts.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# Ported to Python by (c) Mauro Carvalho Chehab, 2025 -# -# For "make pdfdocs", reports of build errors of translations.pdf started -# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -# tumbleweed have started deploying variable-font [3] format of "Noto CJK" -# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -# does not (and likely never will) understand variable fonts for historical -# reasons. -# -# The build error happens even when both of variable- and non-variable-format -# fonts are found on the build system. To make matters worse, Fedora enlists -# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, -# -zh_TW, etc. Hence developers who have interest in CJK pages are more -# likely to encounter the build errors. -# -# This script is invoked from the error path of "make pdfdocs" and emits -# suggestions if variable-font files of "Noto CJK" fonts are in the list of -# fonts accessible from XeTeX. -# -# References: -# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -# [3]: https://en.wikipedia.org/wiki/Variable_font -# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -# [5]: https://build.opensuse.org/request/show/1157217 -# -#=========================================================================== -# Workarounds for building translations.pdf -#=========================================================================== -# -# * Denylist "variable font" Noto CJK fonts. -# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with -# tweaks if necessary. Remove leading "# ". -# - Path of fontconfig/fonts.conf can be overridden by setting an env -# variable FONTS_CONF_DENY_VF. -# -# * Template: -# ----------------------------------------------------------------- -# -# -# -# -# -# -# -# /usr/share/fonts/google-noto-*-cjk-vf-fonts -# -# /usr/share/fonts/truetype/Noto*CJK*-VF.otf -# -# -# -# ----------------------------------------------------------------- -# -# The denylisting is activated for "make pdfdocs". -# -# * For skipping CJK pages in PDF -# - Uninstall texlive-xecjk. -# Denylisting is not needed in this case. -# -# * For printing CJK pages in PDF -# - Need non-variable "Noto CJK" fonts. -# * Fedora -# - google-noto-sans-cjk-fonts -# - google-noto-serif-cjk-fonts -# * openSUSE tumbleweed -# - Non-variable "Noto CJK" fonts are not available as distro packages -# as of April, 2024. Fetch a set of font files from upstream Noto -# CJK Font released at: -# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc -# and at: -# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc -# , then uncompress and deploy them. -# - Remember to update fontconfig cache by running fc-cache. -# -# !!! Caution !!! -# Uninstalling "variable font" packages can be dangerous. -# They might be depended upon by other packages important for your work. -# Denylisting should be less invasive, as it is effective only while -# XeLaTeX runs in "make pdfdocs". - -import os -import re -import subprocess -import sys -import textwrap - -class LatexFontChecker: - """ - Detect problems with CJK variable fonts that affect PDF builds for - translations. - """ - - def __init__(self): - deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") - - self.environ = os.environ.copy() - self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) - - self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") - - def get_noto_cjk_vf_fonts(self): - """Get Noto CJK fonts""" - - cjk_fonts = set() - cmd = ["fc-list", ":", "file", "family", "variable"] - try: - result = subprocess.run(cmd,stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - env=self.environ, - check=True) - - except subprocess.CalledProcessError as exc: - sys.exit(f"Error running fc-list: {repr(exc)}") - - for line in result.stdout.splitlines(): - if 'variable=True' not in line: - continue - - match = self.re_cjk.search(line) - if match: - cjk_fonts.add(match.group(1)) - - return sorted(cjk_fonts) - - def check(self): - """Check for problems with CJK fonts""" - - fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") - if not fonts: - return None - - rel_file = os.path.relpath(__file__, os.getcwd()) - - msg = "=" * 77 + "\n" - msg += 'XeTeX is confused by "variable font" files listed below:\n' - msg += fonts + "\n" - msg += textwrap.dedent(f""" - For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. - Or, CJK pages can be skipped by uninstalling texlive-xecjk. - - For more info on denylisting, other options, and variable font, see header - comments of {rel_file}. - """) - msg += "=" * 77 - - return msg - -if __name__ == "__main__": - msg = LatexFontChecker().check() - if msg: - print(msg) - - sys.exit(1) diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py new file mode 100755 index 000000000000..79b28f0f7d85 --- /dev/null +++ b/tools/docs/check-variable-fonts.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 +# +# pylint: disable=C0103 + +""" +Detect problematic Noto CJK variable fonts. + +or more details, see lib/latex_fonts.py. +""" + +import sys + +from lib.latex_fonts import LatexFontChecker + +msg = LatexFontChecker().check() +if msg: + print(msg) + +sys.exit(1) diff --git a/tools/docs/lib/latex_fonts.py b/tools/docs/lib/latex_fonts.py new file mode 100755 index 000000000000..81358a70f320 --- /dev/null +++ b/tools/docs/lib/latex_fonts.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts. + +For "make pdfdocs", reports of build errors of translations.pdf started +arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3] format of "Noto CJK" +fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +References: +[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +[3]: https://en.wikipedia.org/wiki/Variable_font +[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +[5]: https://build.opensuse.org/request/show/1157217 + +#=========================================================================== +Workarounds for building translations.pdf +#=========================================================================== + +* Denylist "variable font" Noto CJK fonts. + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template: +----------------------------------------------------------------- + + + + + + + + /usr/share/fonts/google-noto-*-cjk-vf-fonts + + /usr/share/fonts/truetype/Noto*CJK*-VF.otf + + + +----------------------------------------------------------------- + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + - Need non-variable "Noto CJK" fonts. + * Fedora + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + * openSUSE tumbleweed + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + and at: + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + , then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +!!! Caution !!! + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self): + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, see header + comments of {rel_file}. + """) + msg += "=" * 77 + + return msg -- cgit v1.2.3 From abd61d1ff8f0ea4cb099a1f3d5015dea7c8471cf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:41 +0200 Subject: scripts: sphinx-pre-install: move it to tools/docs As we're reorganizing the place where doc scripts are located, move this one to tools/docs. No functional changes. Signed-off-by: Mauro Carvalho Chehab Message-ID: <5e2c40d3aebfd67b7ac7817f548bd1fa4ff661a8.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 14 +- Documentation/doc-guide/sphinx.rst | 4 +- Documentation/sphinx/kerneldoc-preamble.sty | 2 +- .../translations/it_IT/doc-guide/sphinx.rst | 4 +- .../translations/zh_CN/doc-guide/sphinx.rst | 4 +- Documentation/translations/zh_CN/how-to.rst | 2 +- MAINTAINERS | 3 +- scripts/sphinx-pre-install | 1621 -------------------- tools/docs/sphinx-pre-install | 1621 ++++++++++++++++++++ 9 files changed, 1637 insertions(+), 1638 deletions(-) delete mode 100755 scripts/sphinx-pre-install create mode 100755 tools/docs/sphinx-pre-install (limited to 'scripts') diff --git a/Documentation/Makefile b/Documentation/Makefile index 0b71f82f73f6..0e8698fa52ef 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -46,7 +46,7 @@ ifeq ($(HAVE_SPHINX),0) .DEFAULT: $(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.) @echo - @$(srctree)/scripts/sphinx-pre-install + @$(srctree)/tools/docs/sphinx-pre-install @echo " SKIP Sphinx $@ target." else # HAVE_SPHINX @@ -105,7 +105,7 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) fi htmldocs: - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) htmldocs-redirects: $(srctree)/Documentation/.renames.txt @@ -122,7 +122,7 @@ endif endif texinfodocs: - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,texinfo,$(var),texinfo,$(var))) # Note: the 'info' Make target is generated by sphinx itself when @@ -134,7 +134,7 @@ linkcheckdocs: @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var))) latexdocs: - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var))) ifeq ($(HAVE_PDFLATEX),0) @@ -147,7 +147,7 @@ else # HAVE_PDFLATEX pdfdocs: DENY_VF = XDG_CONFIG_HOME=$(FONTS_CONF_DENY_VF) pdfdocs: latexdocs - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check $(foreach var,$(SPHINXDIRS), \ $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || $(PYTHON3) $(srctree)/tools/docs/check-variable-fonts.py || exit; \ mkdir -p $(BUILDDIR)/$(var)/pdf; \ @@ -157,11 +157,11 @@ pdfdocs: latexdocs endif # HAVE_PDFLATEX epubdocs: - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var))) xmldocs: - @$(srctree)/scripts/sphinx-pre-install --version-check + @$(srctree)/tools/docs/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var))) endif # HAVE_SPHINX diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 607589592bfb..932f68c53075 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -106,7 +106,7 @@ There's a script that automatically checks for Sphinx dependencies. If it can recognize your distribution, it will also give a hint about the install command line options for your distro:: - $ ./scripts/sphinx-pre-install + $ ./tools/docs/sphinx-pre-install Checking if the needed tools for Fedora release 26 (Twenty Six) are available Warning: better to also install "texlive-luatex85". You should run: @@ -116,7 +116,7 @@ command line options for your distro:: . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt - Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. + Can't build as 1 mandatory dependency is missing at ./tools/docs/sphinx-pre-install line 468. By default, it checks all the requirements for both html and PDF, including the requirements for images, math expressions and LaTeX build, and assumes diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty index 5d68395539fe..16d9ff46fdf6 100644 --- a/Documentation/sphinx/kerneldoc-preamble.sty +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -220,7 +220,7 @@ If you want them, please install non-variable ``Noto Sans CJK'' font families along with the texlive-xecjk package by following instructions from - \sphinxcode{./scripts/sphinx-pre-install}. + \sphinxcode{./tools/docs/sphinx-pre-install}. Having optional non-variable ``Noto Serif CJK'' font families will improve the looks of those translations. \end{sphinxadmonition}} diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst index 1f513bc33618..a5c5d935febf 100644 --- a/Documentation/translations/it_IT/doc-guide/sphinx.rst +++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst @@ -109,7 +109,7 @@ Sphinx. Se lo script riesce a riconoscere la vostra distribuzione, allora sarà in grado di darvi dei suggerimenti su come procedere per completare l'installazione:: - $ ./scripts/sphinx-pre-install + $ ./tools/docs/sphinx-pre-install Checking if the needed tools for Fedora release 26 (Twenty Six) are available Warning: better to also install "texlive-luatex85". You should run: @@ -119,7 +119,7 @@ l'installazione:: . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt - Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. + Can't build as 1 mandatory dependency is missing at ./tools/docs/sphinx-pre-install line 468. L'impostazione predefinita prevede il controllo dei requisiti per la generazione di documenti html e PDF, includendo anche il supporto per le immagini, le diff --git a/Documentation/translations/zh_CN/doc-guide/sphinx.rst b/Documentation/translations/zh_CN/doc-guide/sphinx.rst index 23eac67fbc30..3375c6f3a811 100644 --- a/Documentation/translations/zh_CN/doc-guide/sphinx.rst +++ b/Documentation/translations/zh_CN/doc-guide/sphinx.rst @@ -84,7 +84,7 @@ PDF和LaTeX构建 这有一个脚本可以自动检查Sphinx依赖项。如果它认得您的发行版,还会提示您所用发行 版的安装命令:: - $ ./scripts/sphinx-pre-install + $ ./tools/docs/sphinx-pre-install Checking if the needed tools for Fedora release 26 (Twenty Six) are available Warning: better to also install "texlive-luatex85". You should run: @@ -94,7 +94,7 @@ PDF和LaTeX构建 . sphinx_2.4.4/bin/activate pip install -r Documentation/sphinx/requirements.txt - Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. + Can't build as 1 mandatory dependency is missing at ./tools/docs/sphinx-pre-install line 468. 默认情况下,它会检查html和PDF的所有依赖项,包括图像、数学表达式和LaTeX构建的 需求,并假设将使用虚拟Python环境。html构建所需的依赖项被认为是必需的,其他依 diff --git a/Documentation/translations/zh_CN/how-to.rst b/Documentation/translations/zh_CN/how-to.rst index ddd99c0f9b4d..714664fec308 100644 --- a/Documentation/translations/zh_CN/how-to.rst +++ b/Documentation/translations/zh_CN/how-to.rst @@ -64,7 +64,7 @@ Linux 发行版和简单地使用 Linux 命令行,那么可以迅速开始了 :: cd linux - ./scripts/sphinx-pre-install + ./tools/docs/sphinx-pre-install 以 Fedora 为例,它的输出是这样的:: diff --git a/MAINTAINERS b/MAINTAINERS index 473152720b17..30d39a5befdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7309,7 +7309,6 @@ F: scripts/lib/abi/* F: scripts/lib/kdoc/* F: tools/docs/* F: tools/net/ynl/pyynl/lib/doc_generator.py -F: scripts/sphinx-pre-install X: Documentation/ABI/ X: Documentation/admin-guide/media/ X: Documentation/devicetree/ @@ -7344,7 +7343,7 @@ L: linux-doc@vger.kernel.org S: Maintained F: Documentation/sphinx/parse-headers.pl F: scripts/documentation-file-ref-check -F: scripts/sphinx-pre-install +F: tools/docs/sphinx-pre-install DOCUMENTATION/ITALIAN M: Federico Vaga diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install deleted file mode 100755 index 954ed3dc0645..000000000000 --- a/scripts/sphinx-pre-install +++ /dev/null @@ -1,1621 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-or-later -# Copyright (c) 2017-2025 Mauro Carvalho Chehab -# -# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302 -# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121 - -# Note: this script requires at least Python 3.6 to run. -# Don't add changes not compatible with it, it is meant to report -# incompatible python versions. - -""" -Dependency checker for Sphinx documentation Kernel build. - -This module provides tools to check for all required dependencies needed to -build documentation using Sphinx, including system packages, Python modules -and LaTeX packages for PDF generation. - -It detect packages for a subset of Linux distributions used by Kernel -maintainers, showing hints and missing dependencies. - -The main class SphinxDependencyChecker handles the dependency checking logic -and provides recommendations for installing missing packages. It supports both -system package installations and Python virtual environments. By default, -system pacage install is recommended. -""" - -import argparse -import os -import re -import subprocess -import sys -from glob import glob - - -def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - - -def ver_str(version): - """Returns a version tuple as major.minor.patch""" - - return ".".join([str(x) for x in version]) - - -RECOMMENDED_VERSION = parse_version("3.4.3") -MIN_PYTHON_VERSION = parse_version("3.7") - - -class DepManager: - """ - Manage package dependencies. There are three types of dependencies: - - - System: dependencies required for docs build; - - Python: python dependencies for a native distro Sphinx install; - - PDF: dependencies needed by PDF builds. - - Each dependency can be mandatory or optional. Not installing an optional - dependency won't break the build, but will cause degradation at the - docs output. - """ - - # Internal types of dependencies. Don't use them outside DepManager class. - _SYS_TYPE = 0 - _PHY_TYPE = 1 - _PDF_TYPE = 2 - - # Dependencies visible outside the class. - # The keys are tuple with: (type, is_mandatory flag). - # - # Currently we're not using all optional dep types. Yet, we'll keep all - # possible combinations here. They're not many, and that makes easier - # if later needed and for the name() method below - - SYSTEM_MANDATORY = (_SYS_TYPE, True) - PYTHON_MANDATORY = (_PHY_TYPE, True) - PDF_MANDATORY = (_PDF_TYPE, True) - - SYSTEM_OPTIONAL = (_SYS_TYPE, False) - PYTHON_OPTIONAL = (_PHY_TYPE, False) - PDF_OPTIONAL = (_PDF_TYPE, True) - - def __init__(self, pdf): - """ - Initialize internal vars: - - - missing: missing dependencies list, containing a distro-independent - name for a missing dependency and its type. - - missing_pkg: ancillary dict containing missing dependencies in - distro namespace, organized by type. - - need: total number of needed dependencies. Never cleaned. - - optional: total number of optional dependencies. Never cleaned. - - pdf: Is PDF support enabled? - """ - self.missing = {} - self.missing_pkg = {} - self.need = 0 - self.optional = 0 - self.pdf = pdf - - @staticmethod - def name(dtype): - """ - Ancillary routine to output a warn/error message reporting - missing dependencies. - """ - if dtype[0] == DepManager._SYS_TYPE: - msg = "build" - elif dtype[0] == DepManager._PHY_TYPE: - msg = "Python" - else: - msg = "PDF" - - if dtype[1]: - return f"ERROR: {msg} mandatory deps missing" - else: - return f"Warning: {msg} optional deps missing" - - @staticmethod - def is_optional(dtype): - """Ancillary routine to report if a dependency is optional""" - return not dtype[1] - - @staticmethod - def is_pdf(dtype): - """Ancillary routine to report if a dependency is for PDF generation""" - if dtype[0] == DepManager._PDF_TYPE: - return True - - return False - - def add_package(self, package, dtype): - """ - Add a package at the self.missing() dictionary. - Doesn't update missing_pkg. - """ - is_optional = DepManager.is_optional(dtype) - self.missing[package] = dtype - if is_optional: - self.optional += 1 - else: - self.need += 1 - - def del_package(self, package): - """ - Remove a package at the self.missing() dictionary. - Doesn't update missing_pkg. - """ - if package in self.missing: - del self.missing[package] - - def clear_deps(self): - """ - Clear dependencies without changing needed/optional. - - This is an ackward way to have a separate section to recommend - a package after system main dependencies. - - TODO: rework the logic to prevent needing it. - """ - - self.missing = {} - self.missing_pkg = {} - - def check_missing(self, progs): - """ - Update self.missing_pkg, using progs dict to convert from the - agnostic package name to distro-specific one. - - Returns an string with the packages to be installed, sorted and - with eventual duplicates removed. - """ - - self.missing_pkg = {} - - for prog, dtype in sorted(self.missing.items()): - # At least on some LTS distros like CentOS 7, texlive doesn't - # provide all packages we need. When such distros are - # detected, we have to disable PDF output. - # - # So, we need to ignore the packages that distros would - # need for LaTeX to work - if DepManager.is_pdf(dtype) and not self.pdf: - self.optional -= 1 - continue - - if not dtype in self.missing_pkg: - self.missing_pkg[dtype] = [] - - self.missing_pkg[dtype].append(progs.get(prog, prog)) - - install = [] - for dtype, pkgs in self.missing_pkg.items(): - install += pkgs - - return " ".join(sorted(set(install))) - - def warn_install(self): - """ - Emit warnings/errors related to missing packages. - """ - - output_msg = "" - - for dtype in sorted(self.missing_pkg.keys()): - progs = " ".join(sorted(set(self.missing_pkg[dtype]))) - - try: - name = DepManager.name(dtype) - output_msg += f'{name}:\t{progs}\n' - except KeyError: - raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}") - - if output_msg: - print(f"\n{output_msg}") - -class AncillaryMethods: - """ - Ancillary methods that checks for missing dependencies for different - types of types, like binaries, python modules, rpm deps, etc. - """ - - @staticmethod - def which(prog): - """ - Our own implementation of which(). We could instead use - shutil.which(), but this function is simple enough. - Probably faster to use this implementation than to import shutil. - """ - for path in os.environ.get("PATH", "").split(":"): - full_path = os.path.join(path, prog) - if os.access(full_path, os.X_OK): - return full_path - - return None - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - result = SphinxDependencyChecker.run([cmd, "--version"], - capture_output=True, text=True) - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9]", - "python3.[0-9][0-9]", - ] - - # Seek for a python binary newer than MIN_PYTHON_VERSION - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = SphinxDependencyChecker.get_python_version(cmd) - if version >= MIN_PYTHON_VERSION: - return cmd - - @staticmethod - def check_python(): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= MIN_PYTHON_VERSION: - ver = ver_str(cur_ver) - print(f"Python version: {ver}") - - # This could be useful for debugging purposes - if SphinxDependencyChecker.which("docutils"): - result = SphinxDependencyChecker.run(["docutils", "--version"], - capture_output=True, text=True) - ver = result.stdout.strip() - match = re.search(r"(\d+\.\d+\.\d+)", ver) - if match: - ver = match.group(1) - - print(f"Docutils version: {ver}") - - return - - python_ver = ver_str(cur_ver) - - new_python_cmd = SphinxDependencyChecker.find_python() - if not new_python_cmd: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") - print(" Can't find a new version. This script may fail") - return - - # Restart script using the newer version - script_path = os.path.abspath(sys.argv[0]) - args = [new_python_cmd, script_path] + sys.argv[1:] - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") - - @staticmethod - def run(*args, **kwargs): - """ - Excecute a command, hiding its output by default. - Preserve comatibility with older Python versions. - """ - - capture_output = kwargs.pop('capture_output', False) - - if capture_output: - if 'stdout' not in kwargs: - kwargs['stdout'] = subprocess.PIPE - if 'stderr' not in kwargs: - kwargs['stderr'] = subprocess.PIPE - else: - if 'stdout' not in kwargs: - kwargs['stdout'] = subprocess.DEVNULL - if 'stderr' not in kwargs: - kwargs['stderr'] = subprocess.DEVNULL - - # Don't break with older Python versions - if 'text' in kwargs and sys.version_info < (3, 7): - kwargs['universal_newlines'] = kwargs.pop('text') - - return subprocess.run(*args, **kwargs) - -class MissingCheckers(AncillaryMethods): - """ - Contains some ancillary checkers for different types of binaries and - package managers. - """ - - def __init__(self, args, texlive): - """ - Initialize its internal variables - """ - self.pdf = args.pdf - self.virtualenv = args.virtualenv - self.version_check = args.version_check - self.texlive = texlive - - self.min_version = (0, 0, 0) - self.cur_version = (0, 0, 0) - - self.deps = DepManager(self.pdf) - - self.need_symlink = 0 - self.need_sphinx = 0 - - self.verbose_warn_install = 1 - - self.virtenv_dir = "" - self.install = "" - self.python_cmd = "" - - self.virtenv_prefix = ["sphinx_", "Sphinx_" ] - - def check_missing_file(self, files, package, dtype): - """ - Does the file exists? If not, add it to missing dependencies. - """ - for f in files: - if os.path.exists(f): - return - self.deps.add_package(package, dtype) - - def check_program(self, prog, dtype): - """ - Does the program exists and it is at the PATH? - If not, add it to missing dependencies. - """ - found = self.which(prog) - if found: - return found - - self.deps.add_package(prog, dtype) - - return None - - def check_perl_module(self, prog, dtype): - """ - Does perl have a dependency? Is it available? - If not, add it to missing dependencies. - - Right now, we still need Perl for doc build, as it is required - by some tools called at docs or kernel build time, like: - - scripts/documentation-file-ref-check - - Also, checkpatch is on Perl. - """ - - # While testing with lxc download template, one of the - # distros (Oracle) didn't have perl - nor even an option to install - # before installing oraclelinux-release-el9 package. - # - # Check it before running an error. If perl is not there, - # add it as a mandatory package, as some parts of the doc builder - # needs it. - if not self.which("perl"): - self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY) - self.deps.add_package(prog, dtype) - return - - try: - self.run(["perl", f"-M{prog}", "-e", "1"], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_python_module(self, module, is_optional=False): - """ - Does a python module exists outside venv? If not, add it to missing - dependencies. - """ - if is_optional: - dtype = DepManager.PYTHON_OPTIONAL - else: - dtype = DepManager.PYTHON_MANDATORY - - try: - self.run([self.python_cmd, "-c", f"import {module}"], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(module, dtype) - - def check_rpm_missing(self, pkgs, dtype): - """ - Does a rpm package exists? If not, add it to missing dependencies. - """ - for prog in pkgs: - try: - self.run(["rpm", "-q", prog], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_pacman_missing(self, pkgs, dtype): - """ - Does a pacman package exists? If not, add it to missing dependencies. - """ - for prog in pkgs: - try: - self.run(["pacman", "-Q", prog], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_missing_tex(self, is_optional=False): - """ - Does a LaTeX package exists? If not, add it to missing dependencies. - """ - if is_optional: - dtype = DepManager.PDF_OPTIONAL - else: - dtype = DepManager.PDF_MANDATORY - - kpsewhich = self.which("kpsewhich") - for prog, package in self.texlive.items(): - - # If kpsewhich is not there, just add it to deps - if not kpsewhich: - self.deps.add_package(package, dtype) - continue - - # Check if the package is needed - try: - result = self.run( - [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True - ) - - # Didn't find. Add it - if not result.stdout.strip(): - self.deps.add_package(package, dtype) - - except subprocess.CalledProcessError: - # kpsewhich returned an error. Add it, just in case - self.deps.add_package(package, dtype) - - def get_sphinx_fname(self): - """ - Gets the binary filename for sphinx-build. - """ - if "SPHINXBUILD" in os.environ: - return os.environ["SPHINXBUILD"] - - fname = "sphinx-build" - if self.which(fname): - return fname - - fname = "sphinx-build-3" - if self.which(fname): - self.need_symlink = 1 - return fname - - return "" - - def get_sphinx_version(self, cmd): - """ - Gets sphinx-build version. - """ - try: - result = self.run([cmd, "--version"], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, check=True) - except (subprocess.CalledProcessError, FileNotFoundError): - return None - - for line in result.stdout.split("\n"): - match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line) - if match: - return parse_version(match.group(1)) - - match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line) - if match: - return parse_version(match.group(1)) - - def check_sphinx(self, conf): - """ - Checks Sphinx minimal requirements - """ - try: - with open(conf, "r", encoding="utf-8") as f: - for line in f: - match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line) - if match: - self.min_version = parse_version(match.group(1)) - break - except IOError: - sys.exit(f"Can't open {conf}") - - if not self.min_version: - sys.exit(f"Can't get needs_sphinx version from {conf}") - - self.virtenv_dir = self.virtenv_prefix[0] + "latest" - - sphinx = self.get_sphinx_fname() - if not sphinx: - self.need_sphinx = 1 - return - - self.cur_version = self.get_sphinx_version(sphinx) - if not self.cur_version: - sys.exit(f"{sphinx} didn't return its version") - - if self.cur_version < self.min_version: - curver = ver_str(self.cur_version) - minver = ver_str(self.min_version) - - print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}") - self.need_sphinx = 1 - return - - # On version check mode, just assume Sphinx has all mandatory deps - if self.version_check and self.cur_version >= RECOMMENDED_VERSION: - sys.exit(0) - - def catcheck(self, filename): - """ - Reads a file if it exists, returning as string. - If not found, returns an empty string. - """ - if os.path.exists(filename): - with open(filename, "r", encoding="utf-8") as f: - return f.read().strip() - return "" - - def get_system_release(self): - """ - Determine the system type. There's no unique way that would work - with all distros with a minimal package install. So, several - methods are used here. - - By default, it will use lsb_release function. If not available, it will - fail back to reading the known different places where the distro name - is stored. - - Several modern distros now have /etc/os-release, which usually have - a decent coverage. - """ - - system_release = "" - - if self.which("lsb_release"): - result = self.run(["lsb_release", "-d"], capture_output=True, text=True) - system_release = result.stdout.replace("Description:", "").strip() - - release_files = [ - "/etc/system-release", - "/etc/redhat-release", - "/etc/lsb-release", - "/etc/gentoo-release", - ] - - if not system_release: - for f in release_files: - system_release = self.catcheck(f) - if system_release: - break - - # This seems more common than LSB these days - if not system_release: - os_var = {} - try: - with open("/etc/os-release", "r", encoding="utf-8") as f: - for line in f: - match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line) - if match: - os_var[match.group(1)] = match.group(2) - - system_release = os_var.get("NAME", "") - if "VERSION_ID" in os_var: - system_release += " " + os_var["VERSION_ID"] - elif "VERSION" in os_var: - system_release += " " + os_var["VERSION"] - except IOError: - pass - - if not system_release: - system_release = self.catcheck("/etc/issue") - - system_release = system_release.strip() - - return system_release - -class SphinxDependencyChecker(MissingCheckers): - """ - Main class for checking Sphinx documentation build dependencies. - - - Check for missing system packages; - - Check for missing Python modules; - - Check for missing LaTeX packages needed by PDF generation; - - Propose Sphinx install via Python Virtual environment; - - Propose Sphinx install via distro-specific package install. - """ - def __init__(self, args): - """Initialize checker variables""" - - # List of required texlive packages on Fedora and OpenSuse - texlive = { - "amsfonts.sty": "texlive-amsfonts", - "amsmath.sty": "texlive-amsmath", - "amssymb.sty": "texlive-amsfonts", - "amsthm.sty": "texlive-amscls", - "anyfontsize.sty": "texlive-anyfontsize", - "atbegshi.sty": "texlive-oberdiek", - "bm.sty": "texlive-tools", - "capt-of.sty": "texlive-capt-of", - "cmap.sty": "texlive-cmap", - "ctexhook.sty": "texlive-ctex", - "ecrm1000.tfm": "texlive-ec", - "eqparbox.sty": "texlive-eqparbox", - "eu1enc.def": "texlive-euenc", - "fancybox.sty": "texlive-fancybox", - "fancyvrb.sty": "texlive-fancyvrb", - "float.sty": "texlive-float", - "fncychap.sty": "texlive-fncychap", - "footnote.sty": "texlive-mdwtools", - "framed.sty": "texlive-framed", - "luatex85.sty": "texlive-luatex85", - "multirow.sty": "texlive-multirow", - "needspace.sty": "texlive-needspace", - "palatino.sty": "texlive-psnfss", - "parskip.sty": "texlive-parskip", - "polyglossia.sty": "texlive-polyglossia", - "tabulary.sty": "texlive-tabulary", - "threeparttable.sty": "texlive-threeparttable", - "titlesec.sty": "texlive-titlesec", - "ucs.sty": "texlive-ucs", - "upquote.sty": "texlive-upquote", - "wrapfig.sty": "texlive-wrapfig", - } - - super().__init__(args, texlive) - - self.need_pip = False - self.rec_sphinx_upgrade = 0 - - self.system_release = self.get_system_release() - self.activate_cmd = "" - - # Some distros may not have a Sphinx shipped package compatible with - # our minimal requirements - self.package_supported = True - - # Recommend a new python version - self.recommend_python = None - - # Certain hints are meant to be shown only once - self.distro_msg = None - - self.latest_avail_ver = (0, 0, 0) - self.venv_ver = (0, 0, 0) - - prefix = os.environ.get("srctree", ".") + "/" - - self.conf = prefix + "Documentation/conf.py" - self.requirement_file = prefix + "Documentation/sphinx/requirements.txt" - - def get_install_progs(self, progs, cmd, extra=None): - """ - Check for missing dependencies using the provided program mapping. - - The actual distro-specific programs are mapped via progs argument. - """ - install = self.deps.check_missing(progs) - - if self.verbose_warn_install: - self.deps.warn_install() - - if not install: - return - - if cmd: - if self.verbose_warn_install: - msg = "You should run:" - else: - msg = "" - - if extra: - msg += "\n\t" + extra.replace("\n", "\n\t") - - return(msg + "\n\tsudo " + cmd + " " + install) - - return None - - # - # Distro-specific hints methods - # - - def give_debian_hints(self): - """ - Provide package installation hints for Debian-based distros. - """ - progs = { - "Pod::Usage": "perl-modules", - "convert": "imagemagick", - "dot": "graphviz", - "ensurepip": "python3-venv", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2-bin", - "virtualenv": "virtualenv", - "xelatex": "texlive-xetex", - "yaml": "python3-yaml", - } - - if self.pdf: - pdf_pkgs = { - "fonts-dejavu": [ - "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", - ], - "fonts-noto-cjk": [ - "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc", - ], - "tex-gyre": [ - "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty" - ], - "texlive-fonts-recommended": [ - "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm", - ], - "texlive-lang-chinese": [ - "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty", - ], - } - - for package, files in pdf_pkgs.items(): - self.check_missing_file(files, package, DepManager.PDF_MANDATORY) - - self.check_program("dvipng", DepManager.PDF_MANDATORY) - - if not self.distro_msg: - self.distro_msg = \ - "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \ - "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert" - - return self.get_install_progs(progs, "apt-get install") - - def give_redhat_hints(self): - """ - Provide package installation hints for RedHat-based distros - (Fedora, RHEL and RHEL-based variants). - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2-tools", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive-xetex-bin", - "yaml": "python3-pyyaml", - } - - fedora_tex_pkgs = [ - "dejavu-sans-fonts", - "dejavu-sans-mono-fonts", - "dejavu-serif-fonts", - "texlive-collection-fontsrecommended", - "texlive-collection-latex", - "texlive-xecjk", - ] - - fedora = False - rel = None - - match = re.search(r"(release|Linux)\s+(\d+)", self.system_release) - if match: - rel = int(match.group(2)) - - if not rel: - print("Couldn't identify release number") - noto_sans_redhat = None - self.pdf = False - elif re.search("Fedora", self.system_release): - # Fedora 38 and upper use this CJK font - - noto_sans_redhat = "google-noto-sans-cjk-fonts" - fedora = True - else: - # Almalinux, CentOS, RHEL, ... - - # at least up to version 9 (and Fedora < 38), that's the CJK font - noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts" - - progs["virtualenv"] = "python-virtualenv" - - if not rel or rel < 8: - print("ERROR: Distro not supported. Too old?") - return - - # RHEL 8 uses Python 3.6, which is not compatible with - # the build system anymore. Suggest Python 3.11 - if rel == 8: - self.check_program("python3.9", DepManager.SYSTEM_MANDATORY) - progs["python3.9"] = "python39" - progs["yaml"] = "python39-pyyaml" - - self.recommend_python = True - - # There's no python39-sphinx package. Only pip is supported - self.package_supported = False - - if not self.distro_msg: - self.distro_msg = \ - "Note: RHEL-based distros typically require extra repositories.\n" \ - "For most, enabling epel and crb are enough:\n" \ - "\tsudo dnf install -y epel-release\n" \ - "\tsudo dnf config-manager --set-enabled crb\n" \ - "Yet, some may have other required repositories. Those commands could be useful:\n" \ - "\tsudo dnf repolist all\n" \ - "\tsudo dnf repoquery --available --info \n" \ - "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want" - - if self.pdf: - pdf_pkgs = [ - "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc", - ] - - self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY) - - self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY) - - self.check_missing_tex(DepManager.PDF_MANDATORY) - - # There's no texlive-ctex on RHEL 8 repositories. This will - # likely affect CJK pdf build only. - if not fedora and rel == 8: - self.deps.del_package("texlive-ctex") - - return self.get_install_progs(progs, "dnf install") - - def give_opensuse_hints(self): - """ - Provide package installation hints for openSUSE-based distros - (Leap and Tumbleweed). - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive-xetex-bin texlive-dejavu", - "yaml": "python3-pyyaml", - } - - suse_tex_pkgs = [ - "texlive-babel-english", - "texlive-caption", - "texlive-colortbl", - "texlive-courier", - "texlive-dvips", - "texlive-helvetic", - "texlive-makeindex", - "texlive-metafont", - "texlive-metapost", - "texlive-palatino", - "texlive-preview", - "texlive-times", - "texlive-zapfchan", - "texlive-zapfding", - ] - - progs["latexmk"] = "texlive-latexmk-bin" - - match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release) - if match: - rel = int(match.group(2)) - - # Leap 15.x uses Python 3.6, which is not compatible with - # the build system anymore. Suggest Python 3.11 - if rel == 15: - if not self.which(self.python_cmd): - self.check_program("python3.11", DepManager.SYSTEM_MANDATORY) - progs["python3.11"] = "python311" - self.recommend_python = True - - progs.update({ - "python-sphinx": "python311-Sphinx python311-Sphinx-latex", - "virtualenv": "python311-virtualenv", - "yaml": "python311-PyYAML", - }) - else: - # Tumbleweed defaults to Python 3.11 - - progs.update({ - "python-sphinx": "python313-Sphinx python313-Sphinx-latex", - "virtualenv": "python313-virtualenv", - "yaml": "python313-PyYAML", - }) - - # FIXME: add support for installing CJK fonts - # - # I tried hard, but was unable to find a way to install - # "Noto Sans CJK SC" on openSUSE - - if self.pdf: - self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY) - if self.pdf: - self.check_missing_tex() - - return self.get_install_progs(progs, "zypper install --no-recommends") - - def give_mageia_hints(self): - """ - Provide package installation hints for Mageia and OpenMandriva. - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive", - "yaml": "python3-yaml", - } - - tex_pkgs = [ - "texlive-fontsextra", - "texlive-fonts-asian", - "fonts-ttf-dejavu", - ] - - if re.search(r"OpenMandriva", self.system_release): - packager_cmd = "dnf install" - noto_sans = "noto-sans-cjk-fonts" - tex_pkgs = [ - "texlive-collection-basic", - "texlive-collection-langcjk", - "texlive-collection-fontsextra", - "texlive-collection-fontsrecommended" - ] - - # Tested on OpenMandriva Lx 4.3 - progs["convert"] = "imagemagick" - progs["yaml"] = "python-pyyaml" - progs["python-virtualenv"] = "python-virtualenv" - progs["python-sphinx"] = "python-sphinx" - progs["xelatex"] = "texlive" - - self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY) - - # On my tests with openMandriva LX 4.0 docker image, upgraded - # to 4.3, python-virtualenv package is broken: it is missing - # ensurepip. Without it, the alternative would be to run: - # python3 -m venv --without-pip ~/sphinx_latest, but running - # pip there won't install sphinx at venv. - # - # Add a note about that. - - if not self.distro_msg: - self.distro_msg = \ - "Notes:\n"\ - "1. for venv, ensurepip could be broken, preventing its install method.\n" \ - "2. at least on OpenMandriva LX 4.3, texlive packages seem broken" - - else: - packager_cmd = "urpmi" - noto_sans = "google-noto-sans-cjk-ttc-fonts" - - progs["latexmk"] = "texlive-collection-basic" - - if self.pdf: - pdf_pkgs = [ - "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/TTF/NotoSans-Regular.ttf", - ] - - self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY) - self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY) - - return self.get_install_progs(progs, packager_cmd) - - def give_arch_linux_hints(self): - """ - Provide package installation hints for ArchLinux. - """ - progs = { - "convert": "imagemagick", - "dot": "graphviz", - "latexmk": "texlive-core", - "rsvg-convert": "extra/librsvg", - "virtualenv": "python-virtualenv", - "xelatex": "texlive-xetex", - "yaml": "python-yaml", - } - - archlinux_tex_pkgs = [ - "texlive-basic", - "texlive-binextra", - "texlive-core", - "texlive-fontsrecommended", - "texlive-langchinese", - "texlive-langcjk", - "texlive-latexextra", - "ttf-dejavu", - ] - - if self.pdf: - self.check_pacman_missing(archlinux_tex_pkgs, - DepManager.PDF_MANDATORY) - - self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"], - "noto-fonts-cjk", - DepManager.PDF_MANDATORY) - - - return self.get_install_progs(progs, "pacman -S") - - def give_gentoo_hints(self): - """ - Provide package installation hints for Gentoo. - """ - texlive_deps = [ - "dev-texlive/texlive-fontsrecommended", - "dev-texlive/texlive-latexextra", - "dev-texlive/texlive-xetex", - "media-fonts/dejavu", - ] - - progs = { - "convert": "media-gfx/imagemagick", - "dot": "media-gfx/graphviz", - "rsvg-convert": "gnome-base/librsvg", - "virtualenv": "dev-python/virtualenv", - "xelatex": " ".join(texlive_deps), - "yaml": "dev-python/pyyaml", - "python-sphinx": "dev-python/sphinx", - } - - if self.pdf: - pdf_pkgs = { - "media-fonts/dejavu": [ - "/usr/share/fonts/dejavu/DejaVuSans.ttf", - ], - "media-fonts/noto-cjk": [ - "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf", - "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc", - ], - } - for package, files in pdf_pkgs.items(): - self.check_missing_file(files, package, DepManager.PDF_MANDATORY) - - # Handling dependencies is a nightmare, as Gentoo refuses to emerge - # some packages if there's no package.use file describing them. - # To make it worse, compilation flags shall also be present there - # for some packages. If USE is not perfect, error/warning messages - # like those are shown: - # - # !!! The following binary packages have been ignored due to non matching USE: - # - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg - # =media-fonts/noto-cjk-20190416 X - # =app-text/texlive-core-2024-r1 X cjk -xetex - # =app-text/texlive-core-2024-r1 X -xetex - # =app-text/texlive-core-2024-r1 -xetex - # =dev-libs/zziplib-0.13.79-r1 sdl - # - # And will ignore such packages, installing the remaining ones. That - # affects mostly the image extension and PDF generation. - - # Package dependencies and the minimal needed args: - portages = { - "graphviz": "media-gfx/graphviz", - "imagemagick": "media-gfx/imagemagick", - "media-libs": "media-libs/harfbuzz icu", - "media-fonts": "media-fonts/noto-cjk", - "texlive": "app-text/texlive-core xetex", - "zziblib": "dev-libs/zziplib sdl", - } - - extra_cmds = "" - if not self.distro_msg: - self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages" - - use_base = "/etc/portage/package.use" - files = glob(f"{use_base}/*") - - for fname, portage in portages.items(): - install = False - - while install is False: - if not files: - # No files under package.usage. Install all - install = True - break - - args = portage.split(" ") - - name = args.pop(0) - - cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files - result = self.run(cmd, stdout=subprocess.PIPE, text=True) - if result.returncode or not result.stdout.strip(): - # File containing portage name not found - install = True - break - - # Ensure that needed USE flags are present - if args: - match_fname = result.stdout.strip() - with open(match_fname, 'r', encoding='utf8', - errors='backslashreplace') as fp: - for line in fp: - for arg in args: - if arg.startswith("-"): - continue - - if not re.search(rf"\s*{arg}\b", line): - # Needed file argument not found - install = True - break - - # Everything looks ok, don't install - break - - # emit a code to setup missing USE - if install: - extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n") - - # Now, we can use emerge and let it respect USE - return self.get_install_progs(progs, - "emerge --ask --changed-use --binpkg-respect-use=y", - extra_cmds) - - def get_install(self): - """ - OS-specific hints logic. Seeks for a hinter. If found, use it to - provide package-manager specific install commands. - - Otherwise, outputs install instructions for the meta-packages. - - Returns a string with the command to be executed to install the - the needed packages, if distro found. Otherwise, return just a - list of packages that require installation. - """ - os_hints = { - re.compile("Red Hat Enterprise Linux"): self.give_redhat_hints, - re.compile("Fedora"): self.give_redhat_hints, - re.compile("AlmaLinux"): self.give_redhat_hints, - re.compile("Amazon Linux"): self.give_redhat_hints, - re.compile("CentOS"): self.give_redhat_hints, - re.compile("openEuler"): self.give_redhat_hints, - re.compile("Oracle Linux Server"): self.give_redhat_hints, - re.compile("Rocky Linux"): self.give_redhat_hints, - re.compile("Springdale Open Enterprise"): self.give_redhat_hints, - - re.compile("Ubuntu"): self.give_debian_hints, - re.compile("Debian"): self.give_debian_hints, - re.compile("Devuan"): self.give_debian_hints, - re.compile("Kali"): self.give_debian_hints, - re.compile("Mint"): self.give_debian_hints, - - re.compile("openSUSE"): self.give_opensuse_hints, - - re.compile("Mageia"): self.give_mageia_hints, - re.compile("OpenMandriva"): self.give_mageia_hints, - - re.compile("Arch Linux"): self.give_arch_linux_hints, - re.compile("Gentoo"): self.give_gentoo_hints, - } - - # If the OS is detected, use per-OS hint logic - for regex, os_hint in os_hints.items(): - if regex.search(self.system_release): - return os_hint() - - # - # Fall-back to generic hint code for other distros - # That's far from ideal, specially for LaTeX dependencies. - # - progs = {"sphinx-build": "sphinx"} - if self.pdf: - self.check_missing_tex() - - self.distro_msg = \ - f"I don't know distro {self.system_release}.\n" \ - "So, I can't provide you a hint with the install procedure.\n" \ - "There are likely missing dependencies." - - return self.get_install_progs(progs, None) - - # - # Common dependencies - # - def deactivate_help(self): - """ - Print a helper message to disable a virtual environment. - """ - - print("\n If you want to exit the virtualenv, you can use:") - print("\tdeactivate") - - def get_virtenv(self): - """ - Give a hint about how to activate an already-existing virtual - environment containing sphinx-build. - - Returns a tuble with (activate_cmd_path, sphinx_version) with - the newest available virtual env. - """ - - cwd = os.getcwd() - - activates = [] - - # Add all sphinx prefixes with possible version numbers - for p in self.virtenv_prefix: - activates += glob(f"{cwd}/{p}[0-9]*/bin/activate") - - activates.sort(reverse=True, key=str.lower) - - # Place sphinx_latest first, if it exists - for p in self.virtenv_prefix: - activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates - - ver = (0, 0, 0) - for f in activates: - # Discard too old Sphinx virtual environments - match = re.search(r"(\d+)\.(\d+)\.(\d+)", f) - if match: - ver = (int(match.group(1)), int(match.group(2)), int(match.group(3))) - - if ver < self.min_version: - continue - - sphinx_cmd = f.replace("activate", "sphinx-build") - if not os.path.isfile(sphinx_cmd): - continue - - ver = self.get_sphinx_version(sphinx_cmd) - - if not ver: - venv_dir = f.replace("/bin/activate", "") - print(f"Warning: virtual environment {venv_dir} is not working.\n" \ - "Python version upgrade? Remove it with:\n\n" \ - "\trm -rf {venv_dir}\n\n") - else: - if self.need_sphinx and ver >= self.min_version: - return (f, ver) - elif parse_version(ver) > self.cur_version: - return (f, ver) - - return ("", ver) - - def recommend_sphinx_upgrade(self): - """ - Check if Sphinx needs to be upgraded. - - Returns a tuple with the higest available Sphinx version if found. - Otherwise, returns None to indicate either that no upgrade is needed - or no venv was found. - """ - - # Avoid running sphinx-builds from venv if cur_version is good - if self.cur_version and self.cur_version >= RECOMMENDED_VERSION: - self.latest_avail_ver = self.cur_version - return None - - # Get the highest version from sphinx_*/bin/sphinx-build and the - # corresponding command to activate the venv/virtenv - self.activate_cmd, self.venv_ver = self.get_virtenv() - - # Store the highest version from Sphinx existing virtualenvs - if self.activate_cmd and self.venv_ver > self.cur_version: - self.latest_avail_ver = self.venv_ver - else: - if self.cur_version: - self.latest_avail_ver = self.cur_version - else: - self.latest_avail_ver = (0, 0, 0) - - # As we don't know package version of Sphinx, and there's no - # virtual environments, don't check if upgrades are needed - if not self.virtualenv: - if not self.latest_avail_ver: - return None - - return self.latest_avail_ver - - # Either there are already a virtual env or a new one should be created - self.need_pip = True - - if not self.latest_avail_ver: - return None - - # Return if the reason is due to an upgrade or not - if self.latest_avail_ver != (0, 0, 0): - if self.latest_avail_ver < RECOMMENDED_VERSION: - self.rec_sphinx_upgrade = 1 - - return self.latest_avail_ver - - def recommend_package(self): - """ - Recommend installing Sphinx as a distro-specific package. - """ - - print("\n2) As a package with:") - - old_need = self.deps.need - old_optional = self.deps.optional - - self.pdf = False - self.deps.optional = 0 - old_verbose = self.verbose_warn_install - self.verbose_warn_install = 0 - - self.deps.clear_deps() - - self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY) - - cmd = self.get_install() - if cmd: - print(cmd) - - self.deps.need = old_need - self.deps.optional = old_optional - self.verbose_warn_install = old_verbose - - def recommend_sphinx_version(self, virtualenv_cmd): - """ - Provide recommendations for installing or upgrading Sphinx based - on current version. - - The logic here is complex, as it have to deal with different versions: - - - minimal supported version; - - minimal PDF version; - - recommended version. - - It also needs to work fine with both distro's package and - venv/virtualenv - """ - - if self.recommend_python: - cur_ver = sys.version_info[:3] - if cur_ver < MIN_PYTHON_VERSION: - print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \ - "Please upgrade it and re-run.\n") - return - - # Version is OK. Nothing to do. - if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION: - return - - if self.latest_avail_ver: - latest_avail_ver = ver_str(self.latest_avail_ver) - - if not self.need_sphinx: - # sphinx-build is present and its version is >= $min_version - - # only recommend enabling a newer virtenv version if makes sense. - if self.latest_avail_ver and self.latest_avail_ver > self.cur_version: - print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:") - if f"{self.virtenv_prefix}" in os.getcwd(): - print("\tdeactivate") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - - if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION: - return - - if not self.virtualenv: - # No sphinx either via package or via virtenv. As we can't - # Compare the versions here, just return, recommending the - # user to install it from the package distro. - if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0): - return - - # User doesn't want a virtenv recommendation, but he already - # installed one via virtenv with a newer version. - # So, print commands to enable it - if self.latest_avail_ver > self.cur_version: - print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:") - if f"{self.virtenv_prefix}" in os.getcwd(): - print("\tdeactivate") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - print("\n") - else: - if self.need_sphinx: - self.deps.need += 1 - - # Suggest newer versions if current ones are too old - if self.latest_avail_ver and self.latest_avail_ver >= self.min_version: - if self.latest_avail_ver >= RECOMMENDED_VERSION: - print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - - # Version is above the minimal required one, but may be - # below the recommended one. So, print warnings/notes - if self.latest_avail_ver < RECOMMENDED_VERSION: - print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.") - - # At this point, either it needs Sphinx or upgrade is recommended, - # both via pip - - if self.rec_sphinx_upgrade: - if not self.virtualenv: - print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n") - else: - print("To upgrade Sphinx, use:\n\n") - else: - print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n") - - if not virtualenv_cmd: - print(" Currently not possible.\n") - print(" Please upgrade Python to a newer version and run this script again") - else: - print(f"\t{virtualenv_cmd} {self.virtenv_dir}") - print(f"\t. {self.virtenv_dir}/bin/activate") - print(f"\tpip install -r {self.requirement_file}") - self.deactivate_help() - - if self.package_supported: - self.recommend_package() - - print("\n" \ - " Please note that Sphinx currentlys produce false-positive\n" \ - " warnings when the same name is used for more than one type (functions,\n" \ - " structs, enums,...). This is known Sphinx bug. For more details, see:\n" \ - "\thttps://github.com/sphinx-doc/sphinx/pull/8313") - - def check_needs(self): - """ - Main method that checks needed dependencies and provides - recommendations. - """ - self.python_cmd = sys.executable - - # Check if Sphinx is already accessible from current environment - self.check_sphinx(self.conf) - - if self.system_release: - print(f"Detected OS: {self.system_release}.") - else: - print("Unknown OS") - if self.cur_version != (0, 0, 0): - ver = ver_str(self.cur_version) - print(f"Sphinx version: {ver}\n") - - # Check the type of virtual env, depending on Python version - virtualenv_cmd = None - - if sys.version_info < MIN_PYTHON_VERSION: - min_ver = ver_str(MIN_PYTHON_VERSION) - print(f"ERROR: at least python {min_ver} is required to build the kernel docs") - self.need_sphinx = 1 - - self.venv_ver = self.recommend_sphinx_upgrade() - - if self.need_pip: - if sys.version_info < MIN_PYTHON_VERSION: - self.need_pip = False - print("Warning: python version is not supported.") - else: - virtualenv_cmd = f"{self.python_cmd} -m venv" - self.check_python_module("ensurepip") - - # Check for needed programs/tools - self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY) - - self.check_program("make", DepManager.SYSTEM_MANDATORY) - self.check_program("which", DepManager.SYSTEM_MANDATORY) - - self.check_program("dot", DepManager.SYSTEM_OPTIONAL) - self.check_program("convert", DepManager.SYSTEM_OPTIONAL) - - self.check_python_module("yaml") - - if self.pdf: - self.check_program("xelatex", DepManager.PDF_MANDATORY) - self.check_program("rsvg-convert", DepManager.PDF_MANDATORY) - self.check_program("latexmk", DepManager.PDF_MANDATORY) - - # Do distro-specific checks and output distro-install commands - cmd = self.get_install() - if cmd: - print(cmd) - - # If distro requires some special instructions, print here. - # Please notice that get_install() needs to be called first. - if self.distro_msg: - print("\n" + self.distro_msg) - - if not self.python_cmd: - if self.need == 1: - sys.exit("Can't build as 1 mandatory dependency is missing") - elif self.need: - sys.exit(f"Can't build as {self.need} mandatory dependencies are missing") - - # Check if sphinx-build is called sphinx-build-3 - if self.need_symlink: - sphinx_path = self.which("sphinx-build-3") - if sphinx_path: - print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n") - - self.recommend_sphinx_version(virtualenv_cmd) - print("") - - if not self.deps.optional: - print("All optional dependencies are met.") - - if self.deps.need == 1: - sys.exit("Can't build as 1 mandatory dependency is missing") - elif self.deps.need: - sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing") - - print("Needed package dependencies are met.") - -DESCRIPTION = """ -Process some flags related to Sphinx installation and documentation build. -""" - - -def main(): - """Main function""" - parser = argparse.ArgumentParser(description=DESCRIPTION) - - parser.add_argument( - "--no-virtualenv", - action="store_false", - dest="virtualenv", - help="Recommend installing Sphinx instead of using a virtualenv", - ) - - parser.add_argument( - "--no-pdf", - action="store_false", - dest="pdf", - help="Don't check for dependencies required to build PDF docs", - ) - - parser.add_argument( - "--version-check", - action="store_true", - dest="version_check", - help="If version is compatible, don't check for missing dependencies", - ) - - args = parser.parse_args() - - checker = SphinxDependencyChecker(args) - - checker.check_python() - checker.check_needs() - -# Call main if not used as module -if __name__ == "__main__": - main() diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install new file mode 100755 index 000000000000..954ed3dc0645 --- /dev/null +++ b/tools/docs/sphinx-pre-install @@ -0,0 +1,1621 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab +# +# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302 +# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121 + +# Note: this script requires at least Python 3.6 to run. +# Don't add changes not compatible with it, it is meant to report +# incompatible python versions. + +""" +Dependency checker for Sphinx documentation Kernel build. + +This module provides tools to check for all required dependencies needed to +build documentation using Sphinx, including system packages, Python modules +and LaTeX packages for PDF generation. + +It detect packages for a subset of Linux distributions used by Kernel +maintainers, showing hints and missing dependencies. + +The main class SphinxDependencyChecker handles the dependency checking logic +and provides recommendations for installing missing packages. It supports both +system package installations and Python virtual environments. By default, +system pacage install is recommended. +""" + +import argparse +import os +import re +import subprocess +import sys +from glob import glob + + +def parse_version(version): + """Convert a major.minor.patch version into a tuple""" + return tuple(int(x) for x in version.split(".")) + + +def ver_str(version): + """Returns a version tuple as major.minor.patch""" + + return ".".join([str(x) for x in version]) + + +RECOMMENDED_VERSION = parse_version("3.4.3") +MIN_PYTHON_VERSION = parse_version("3.7") + + +class DepManager: + """ + Manage package dependencies. There are three types of dependencies: + + - System: dependencies required for docs build; + - Python: python dependencies for a native distro Sphinx install; + - PDF: dependencies needed by PDF builds. + + Each dependency can be mandatory or optional. Not installing an optional + dependency won't break the build, but will cause degradation at the + docs output. + """ + + # Internal types of dependencies. Don't use them outside DepManager class. + _SYS_TYPE = 0 + _PHY_TYPE = 1 + _PDF_TYPE = 2 + + # Dependencies visible outside the class. + # The keys are tuple with: (type, is_mandatory flag). + # + # Currently we're not using all optional dep types. Yet, we'll keep all + # possible combinations here. They're not many, and that makes easier + # if later needed and for the name() method below + + SYSTEM_MANDATORY = (_SYS_TYPE, True) + PYTHON_MANDATORY = (_PHY_TYPE, True) + PDF_MANDATORY = (_PDF_TYPE, True) + + SYSTEM_OPTIONAL = (_SYS_TYPE, False) + PYTHON_OPTIONAL = (_PHY_TYPE, False) + PDF_OPTIONAL = (_PDF_TYPE, True) + + def __init__(self, pdf): + """ + Initialize internal vars: + + - missing: missing dependencies list, containing a distro-independent + name for a missing dependency and its type. + - missing_pkg: ancillary dict containing missing dependencies in + distro namespace, organized by type. + - need: total number of needed dependencies. Never cleaned. + - optional: total number of optional dependencies. Never cleaned. + - pdf: Is PDF support enabled? + """ + self.missing = {} + self.missing_pkg = {} + self.need = 0 + self.optional = 0 + self.pdf = pdf + + @staticmethod + def name(dtype): + """ + Ancillary routine to output a warn/error message reporting + missing dependencies. + """ + if dtype[0] == DepManager._SYS_TYPE: + msg = "build" + elif dtype[0] == DepManager._PHY_TYPE: + msg = "Python" + else: + msg = "PDF" + + if dtype[1]: + return f"ERROR: {msg} mandatory deps missing" + else: + return f"Warning: {msg} optional deps missing" + + @staticmethod + def is_optional(dtype): + """Ancillary routine to report if a dependency is optional""" + return not dtype[1] + + @staticmethod + def is_pdf(dtype): + """Ancillary routine to report if a dependency is for PDF generation""" + if dtype[0] == DepManager._PDF_TYPE: + return True + + return False + + def add_package(self, package, dtype): + """ + Add a package at the self.missing() dictionary. + Doesn't update missing_pkg. + """ + is_optional = DepManager.is_optional(dtype) + self.missing[package] = dtype + if is_optional: + self.optional += 1 + else: + self.need += 1 + + def del_package(self, package): + """ + Remove a package at the self.missing() dictionary. + Doesn't update missing_pkg. + """ + if package in self.missing: + del self.missing[package] + + def clear_deps(self): + """ + Clear dependencies without changing needed/optional. + + This is an ackward way to have a separate section to recommend + a package after system main dependencies. + + TODO: rework the logic to prevent needing it. + """ + + self.missing = {} + self.missing_pkg = {} + + def check_missing(self, progs): + """ + Update self.missing_pkg, using progs dict to convert from the + agnostic package name to distro-specific one. + + Returns an string with the packages to be installed, sorted and + with eventual duplicates removed. + """ + + self.missing_pkg = {} + + for prog, dtype in sorted(self.missing.items()): + # At least on some LTS distros like CentOS 7, texlive doesn't + # provide all packages we need. When such distros are + # detected, we have to disable PDF output. + # + # So, we need to ignore the packages that distros would + # need for LaTeX to work + if DepManager.is_pdf(dtype) and not self.pdf: + self.optional -= 1 + continue + + if not dtype in self.missing_pkg: + self.missing_pkg[dtype] = [] + + self.missing_pkg[dtype].append(progs.get(prog, prog)) + + install = [] + for dtype, pkgs in self.missing_pkg.items(): + install += pkgs + + return " ".join(sorted(set(install))) + + def warn_install(self): + """ + Emit warnings/errors related to missing packages. + """ + + output_msg = "" + + for dtype in sorted(self.missing_pkg.keys()): + progs = " ".join(sorted(set(self.missing_pkg[dtype]))) + + try: + name = DepManager.name(dtype) + output_msg += f'{name}:\t{progs}\n' + except KeyError: + raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}") + + if output_msg: + print(f"\n{output_msg}") + +class AncillaryMethods: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + @staticmethod + def which(prog): + """ + Our own implementation of which(). We could instead use + shutil.which(), but this function is simple enough. + Probably faster to use this implementation than to import shutil. + """ + for path in os.environ.get("PATH", "").split(":"): + full_path = os.path.join(path, prog) + if os.access(full_path, os.X_OK): + return full_path + + return None + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + result = SphinxDependencyChecker.run([cmd, "--version"], + capture_output=True, text=True) + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9]", + "python3.[0-9][0-9]", + ] + + # Seek for a python binary newer than MIN_PYTHON_VERSION + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = SphinxDependencyChecker.get_python_version(cmd) + if version >= MIN_PYTHON_VERSION: + return cmd + + @staticmethod + def check_python(): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= MIN_PYTHON_VERSION: + ver = ver_str(cur_ver) + print(f"Python version: {ver}") + + # This could be useful for debugging purposes + if SphinxDependencyChecker.which("docutils"): + result = SphinxDependencyChecker.run(["docutils", "--version"], + capture_output=True, text=True) + ver = result.stdout.strip() + match = re.search(r"(\d+\.\d+\.\d+)", ver) + if match: + ver = match.group(1) + + print(f"Docutils version: {ver}") + + return + + python_ver = ver_str(cur_ver) + + new_python_cmd = SphinxDependencyChecker.find_python() + if not new_python_cmd: + print(f"ERROR: Python version {python_ver} is not spported anymore\n") + print(" Can't find a new version. This script may fail") + return + + # Restart script using the newer version + script_path = os.path.abspath(sys.argv[0]) + args = [new_python_cmd, script_path] + sys.argv[1:] + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") + + @staticmethod + def run(*args, **kwargs): + """ + Excecute a command, hiding its output by default. + Preserve comatibility with older Python versions. + """ + + capture_output = kwargs.pop('capture_output', False) + + if capture_output: + if 'stdout' not in kwargs: + kwargs['stdout'] = subprocess.PIPE + if 'stderr' not in kwargs: + kwargs['stderr'] = subprocess.PIPE + else: + if 'stdout' not in kwargs: + kwargs['stdout'] = subprocess.DEVNULL + if 'stderr' not in kwargs: + kwargs['stderr'] = subprocess.DEVNULL + + # Don't break with older Python versions + if 'text' in kwargs and sys.version_info < (3, 7): + kwargs['universal_newlines'] = kwargs.pop('text') + + return subprocess.run(*args, **kwargs) + +class MissingCheckers(AncillaryMethods): + """ + Contains some ancillary checkers for different types of binaries and + package managers. + """ + + def __init__(self, args, texlive): + """ + Initialize its internal variables + """ + self.pdf = args.pdf + self.virtualenv = args.virtualenv + self.version_check = args.version_check + self.texlive = texlive + + self.min_version = (0, 0, 0) + self.cur_version = (0, 0, 0) + + self.deps = DepManager(self.pdf) + + self.need_symlink = 0 + self.need_sphinx = 0 + + self.verbose_warn_install = 1 + + self.virtenv_dir = "" + self.install = "" + self.python_cmd = "" + + self.virtenv_prefix = ["sphinx_", "Sphinx_" ] + + def check_missing_file(self, files, package, dtype): + """ + Does the file exists? If not, add it to missing dependencies. + """ + for f in files: + if os.path.exists(f): + return + self.deps.add_package(package, dtype) + + def check_program(self, prog, dtype): + """ + Does the program exists and it is at the PATH? + If not, add it to missing dependencies. + """ + found = self.which(prog) + if found: + return found + + self.deps.add_package(prog, dtype) + + return None + + def check_perl_module(self, prog, dtype): + """ + Does perl have a dependency? Is it available? + If not, add it to missing dependencies. + + Right now, we still need Perl for doc build, as it is required + by some tools called at docs or kernel build time, like: + + scripts/documentation-file-ref-check + + Also, checkpatch is on Perl. + """ + + # While testing with lxc download template, one of the + # distros (Oracle) didn't have perl - nor even an option to install + # before installing oraclelinux-release-el9 package. + # + # Check it before running an error. If perl is not there, + # add it as a mandatory package, as some parts of the doc builder + # needs it. + if not self.which("perl"): + self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY) + self.deps.add_package(prog, dtype) + return + + try: + self.run(["perl", f"-M{prog}", "-e", "1"], check=True) + except subprocess.CalledProcessError: + self.deps.add_package(prog, dtype) + + def check_python_module(self, module, is_optional=False): + """ + Does a python module exists outside venv? If not, add it to missing + dependencies. + """ + if is_optional: + dtype = DepManager.PYTHON_OPTIONAL + else: + dtype = DepManager.PYTHON_MANDATORY + + try: + self.run([self.python_cmd, "-c", f"import {module}"], check=True) + except subprocess.CalledProcessError: + self.deps.add_package(module, dtype) + + def check_rpm_missing(self, pkgs, dtype): + """ + Does a rpm package exists? If not, add it to missing dependencies. + """ + for prog in pkgs: + try: + self.run(["rpm", "-q", prog], check=True) + except subprocess.CalledProcessError: + self.deps.add_package(prog, dtype) + + def check_pacman_missing(self, pkgs, dtype): + """ + Does a pacman package exists? If not, add it to missing dependencies. + """ + for prog in pkgs: + try: + self.run(["pacman", "-Q", prog], check=True) + except subprocess.CalledProcessError: + self.deps.add_package(prog, dtype) + + def check_missing_tex(self, is_optional=False): + """ + Does a LaTeX package exists? If not, add it to missing dependencies. + """ + if is_optional: + dtype = DepManager.PDF_OPTIONAL + else: + dtype = DepManager.PDF_MANDATORY + + kpsewhich = self.which("kpsewhich") + for prog, package in self.texlive.items(): + + # If kpsewhich is not there, just add it to deps + if not kpsewhich: + self.deps.add_package(package, dtype) + continue + + # Check if the package is needed + try: + result = self.run( + [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True + ) + + # Didn't find. Add it + if not result.stdout.strip(): + self.deps.add_package(package, dtype) + + except subprocess.CalledProcessError: + # kpsewhich returned an error. Add it, just in case + self.deps.add_package(package, dtype) + + def get_sphinx_fname(self): + """ + Gets the binary filename for sphinx-build. + """ + if "SPHINXBUILD" in os.environ: + return os.environ["SPHINXBUILD"] + + fname = "sphinx-build" + if self.which(fname): + return fname + + fname = "sphinx-build-3" + if self.which(fname): + self.need_symlink = 1 + return fname + + return "" + + def get_sphinx_version(self, cmd): + """ + Gets sphinx-build version. + """ + try: + result = self.run([cmd, "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, check=True) + except (subprocess.CalledProcessError, FileNotFoundError): + return None + + for line in result.stdout.split("\n"): + match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line) + if match: + return parse_version(match.group(1)) + + match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line) + if match: + return parse_version(match.group(1)) + + def check_sphinx(self, conf): + """ + Checks Sphinx minimal requirements + """ + try: + with open(conf, "r", encoding="utf-8") as f: + for line in f: + match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line) + if match: + self.min_version = parse_version(match.group(1)) + break + except IOError: + sys.exit(f"Can't open {conf}") + + if not self.min_version: + sys.exit(f"Can't get needs_sphinx version from {conf}") + + self.virtenv_dir = self.virtenv_prefix[0] + "latest" + + sphinx = self.get_sphinx_fname() + if not sphinx: + self.need_sphinx = 1 + return + + self.cur_version = self.get_sphinx_version(sphinx) + if not self.cur_version: + sys.exit(f"{sphinx} didn't return its version") + + if self.cur_version < self.min_version: + curver = ver_str(self.cur_version) + minver = ver_str(self.min_version) + + print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}") + self.need_sphinx = 1 + return + + # On version check mode, just assume Sphinx has all mandatory deps + if self.version_check and self.cur_version >= RECOMMENDED_VERSION: + sys.exit(0) + + def catcheck(self, filename): + """ + Reads a file if it exists, returning as string. + If not found, returns an empty string. + """ + if os.path.exists(filename): + with open(filename, "r", encoding="utf-8") as f: + return f.read().strip() + return "" + + def get_system_release(self): + """ + Determine the system type. There's no unique way that would work + with all distros with a minimal package install. So, several + methods are used here. + + By default, it will use lsb_release function. If not available, it will + fail back to reading the known different places where the distro name + is stored. + + Several modern distros now have /etc/os-release, which usually have + a decent coverage. + """ + + system_release = "" + + if self.which("lsb_release"): + result = self.run(["lsb_release", "-d"], capture_output=True, text=True) + system_release = result.stdout.replace("Description:", "").strip() + + release_files = [ + "/etc/system-release", + "/etc/redhat-release", + "/etc/lsb-release", + "/etc/gentoo-release", + ] + + if not system_release: + for f in release_files: + system_release = self.catcheck(f) + if system_release: + break + + # This seems more common than LSB these days + if not system_release: + os_var = {} + try: + with open("/etc/os-release", "r", encoding="utf-8") as f: + for line in f: + match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line) + if match: + os_var[match.group(1)] = match.group(2) + + system_release = os_var.get("NAME", "") + if "VERSION_ID" in os_var: + system_release += " " + os_var["VERSION_ID"] + elif "VERSION" in os_var: + system_release += " " + os_var["VERSION"] + except IOError: + pass + + if not system_release: + system_release = self.catcheck("/etc/issue") + + system_release = system_release.strip() + + return system_release + +class SphinxDependencyChecker(MissingCheckers): + """ + Main class for checking Sphinx documentation build dependencies. + + - Check for missing system packages; + - Check for missing Python modules; + - Check for missing LaTeX packages needed by PDF generation; + - Propose Sphinx install via Python Virtual environment; + - Propose Sphinx install via distro-specific package install. + """ + def __init__(self, args): + """Initialize checker variables""" + + # List of required texlive packages on Fedora and OpenSuse + texlive = { + "amsfonts.sty": "texlive-amsfonts", + "amsmath.sty": "texlive-amsmath", + "amssymb.sty": "texlive-amsfonts", + "amsthm.sty": "texlive-amscls", + "anyfontsize.sty": "texlive-anyfontsize", + "atbegshi.sty": "texlive-oberdiek", + "bm.sty": "texlive-tools", + "capt-of.sty": "texlive-capt-of", + "cmap.sty": "texlive-cmap", + "ctexhook.sty": "texlive-ctex", + "ecrm1000.tfm": "texlive-ec", + "eqparbox.sty": "texlive-eqparbox", + "eu1enc.def": "texlive-euenc", + "fancybox.sty": "texlive-fancybox", + "fancyvrb.sty": "texlive-fancyvrb", + "float.sty": "texlive-float", + "fncychap.sty": "texlive-fncychap", + "footnote.sty": "texlive-mdwtools", + "framed.sty": "texlive-framed", + "luatex85.sty": "texlive-luatex85", + "multirow.sty": "texlive-multirow", + "needspace.sty": "texlive-needspace", + "palatino.sty": "texlive-psnfss", + "parskip.sty": "texlive-parskip", + "polyglossia.sty": "texlive-polyglossia", + "tabulary.sty": "texlive-tabulary", + "threeparttable.sty": "texlive-threeparttable", + "titlesec.sty": "texlive-titlesec", + "ucs.sty": "texlive-ucs", + "upquote.sty": "texlive-upquote", + "wrapfig.sty": "texlive-wrapfig", + } + + super().__init__(args, texlive) + + self.need_pip = False + self.rec_sphinx_upgrade = 0 + + self.system_release = self.get_system_release() + self.activate_cmd = "" + + # Some distros may not have a Sphinx shipped package compatible with + # our minimal requirements + self.package_supported = True + + # Recommend a new python version + self.recommend_python = None + + # Certain hints are meant to be shown only once + self.distro_msg = None + + self.latest_avail_ver = (0, 0, 0) + self.venv_ver = (0, 0, 0) + + prefix = os.environ.get("srctree", ".") + "/" + + self.conf = prefix + "Documentation/conf.py" + self.requirement_file = prefix + "Documentation/sphinx/requirements.txt" + + def get_install_progs(self, progs, cmd, extra=None): + """ + Check for missing dependencies using the provided program mapping. + + The actual distro-specific programs are mapped via progs argument. + """ + install = self.deps.check_missing(progs) + + if self.verbose_warn_install: + self.deps.warn_install() + + if not install: + return + + if cmd: + if self.verbose_warn_install: + msg = "You should run:" + else: + msg = "" + + if extra: + msg += "\n\t" + extra.replace("\n", "\n\t") + + return(msg + "\n\tsudo " + cmd + " " + install) + + return None + + # + # Distro-specific hints methods + # + + def give_debian_hints(self): + """ + Provide package installation hints for Debian-based distros. + """ + progs = { + "Pod::Usage": "perl-modules", + "convert": "imagemagick", + "dot": "graphviz", + "ensurepip": "python3-venv", + "python-sphinx": "python3-sphinx", + "rsvg-convert": "librsvg2-bin", + "virtualenv": "virtualenv", + "xelatex": "texlive-xetex", + "yaml": "python3-yaml", + } + + if self.pdf: + pdf_pkgs = { + "fonts-dejavu": [ + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", + ], + "fonts-noto-cjk": [ + "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc", + ], + "tex-gyre": [ + "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty" + ], + "texlive-fonts-recommended": [ + "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm", + ], + "texlive-lang-chinese": [ + "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty", + ], + } + + for package, files in pdf_pkgs.items(): + self.check_missing_file(files, package, DepManager.PDF_MANDATORY) + + self.check_program("dvipng", DepManager.PDF_MANDATORY) + + if not self.distro_msg: + self.distro_msg = \ + "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \ + "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert" + + return self.get_install_progs(progs, "apt-get install") + + def give_redhat_hints(self): + """ + Provide package installation hints for RedHat-based distros + (Fedora, RHEL and RHEL-based variants). + """ + progs = { + "Pod::Usage": "perl-Pod-Usage", + "convert": "ImageMagick", + "dot": "graphviz", + "python-sphinx": "python3-sphinx", + "rsvg-convert": "librsvg2-tools", + "virtualenv": "python3-virtualenv", + "xelatex": "texlive-xetex-bin", + "yaml": "python3-pyyaml", + } + + fedora_tex_pkgs = [ + "dejavu-sans-fonts", + "dejavu-sans-mono-fonts", + "dejavu-serif-fonts", + "texlive-collection-fontsrecommended", + "texlive-collection-latex", + "texlive-xecjk", + ] + + fedora = False + rel = None + + match = re.search(r"(release|Linux)\s+(\d+)", self.system_release) + if match: + rel = int(match.group(2)) + + if not rel: + print("Couldn't identify release number") + noto_sans_redhat = None + self.pdf = False + elif re.search("Fedora", self.system_release): + # Fedora 38 and upper use this CJK font + + noto_sans_redhat = "google-noto-sans-cjk-fonts" + fedora = True + else: + # Almalinux, CentOS, RHEL, ... + + # at least up to version 9 (and Fedora < 38), that's the CJK font + noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts" + + progs["virtualenv"] = "python-virtualenv" + + if not rel or rel < 8: + print("ERROR: Distro not supported. Too old?") + return + + # RHEL 8 uses Python 3.6, which is not compatible with + # the build system anymore. Suggest Python 3.11 + if rel == 8: + self.check_program("python3.9", DepManager.SYSTEM_MANDATORY) + progs["python3.9"] = "python39" + progs["yaml"] = "python39-pyyaml" + + self.recommend_python = True + + # There's no python39-sphinx package. Only pip is supported + self.package_supported = False + + if not self.distro_msg: + self.distro_msg = \ + "Note: RHEL-based distros typically require extra repositories.\n" \ + "For most, enabling epel and crb are enough:\n" \ + "\tsudo dnf install -y epel-release\n" \ + "\tsudo dnf config-manager --set-enabled crb\n" \ + "Yet, some may have other required repositories. Those commands could be useful:\n" \ + "\tsudo dnf repolist all\n" \ + "\tsudo dnf repoquery --available --info \n" \ + "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want" + + if self.pdf: + pdf_pkgs = [ + "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc", + ] + + self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY) + + self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY) + + self.check_missing_tex(DepManager.PDF_MANDATORY) + + # There's no texlive-ctex on RHEL 8 repositories. This will + # likely affect CJK pdf build only. + if not fedora and rel == 8: + self.deps.del_package("texlive-ctex") + + return self.get_install_progs(progs, "dnf install") + + def give_opensuse_hints(self): + """ + Provide package installation hints for openSUSE-based distros + (Leap and Tumbleweed). + """ + progs = { + "Pod::Usage": "perl-Pod-Usage", + "convert": "ImageMagick", + "dot": "graphviz", + "python-sphinx": "python3-sphinx", + "virtualenv": "python3-virtualenv", + "xelatex": "texlive-xetex-bin texlive-dejavu", + "yaml": "python3-pyyaml", + } + + suse_tex_pkgs = [ + "texlive-babel-english", + "texlive-caption", + "texlive-colortbl", + "texlive-courier", + "texlive-dvips", + "texlive-helvetic", + "texlive-makeindex", + "texlive-metafont", + "texlive-metapost", + "texlive-palatino", + "texlive-preview", + "texlive-times", + "texlive-zapfchan", + "texlive-zapfding", + ] + + progs["latexmk"] = "texlive-latexmk-bin" + + match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release) + if match: + rel = int(match.group(2)) + + # Leap 15.x uses Python 3.6, which is not compatible with + # the build system anymore. Suggest Python 3.11 + if rel == 15: + if not self.which(self.python_cmd): + self.check_program("python3.11", DepManager.SYSTEM_MANDATORY) + progs["python3.11"] = "python311" + self.recommend_python = True + + progs.update({ + "python-sphinx": "python311-Sphinx python311-Sphinx-latex", + "virtualenv": "python311-virtualenv", + "yaml": "python311-PyYAML", + }) + else: + # Tumbleweed defaults to Python 3.11 + + progs.update({ + "python-sphinx": "python313-Sphinx python313-Sphinx-latex", + "virtualenv": "python313-virtualenv", + "yaml": "python313-PyYAML", + }) + + # FIXME: add support for installing CJK fonts + # + # I tried hard, but was unable to find a way to install + # "Noto Sans CJK SC" on openSUSE + + if self.pdf: + self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY) + if self.pdf: + self.check_missing_tex() + + return self.get_install_progs(progs, "zypper install --no-recommends") + + def give_mageia_hints(self): + """ + Provide package installation hints for Mageia and OpenMandriva. + """ + progs = { + "Pod::Usage": "perl-Pod-Usage", + "convert": "ImageMagick", + "dot": "graphviz", + "python-sphinx": "python3-sphinx", + "rsvg-convert": "librsvg2", + "virtualenv": "python3-virtualenv", + "xelatex": "texlive", + "yaml": "python3-yaml", + } + + tex_pkgs = [ + "texlive-fontsextra", + "texlive-fonts-asian", + "fonts-ttf-dejavu", + ] + + if re.search(r"OpenMandriva", self.system_release): + packager_cmd = "dnf install" + noto_sans = "noto-sans-cjk-fonts" + tex_pkgs = [ + "texlive-collection-basic", + "texlive-collection-langcjk", + "texlive-collection-fontsextra", + "texlive-collection-fontsrecommended" + ] + + # Tested on OpenMandriva Lx 4.3 + progs["convert"] = "imagemagick" + progs["yaml"] = "python-pyyaml" + progs["python-virtualenv"] = "python-virtualenv" + progs["python-sphinx"] = "python-sphinx" + progs["xelatex"] = "texlive" + + self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY) + + # On my tests with openMandriva LX 4.0 docker image, upgraded + # to 4.3, python-virtualenv package is broken: it is missing + # ensurepip. Without it, the alternative would be to run: + # python3 -m venv --without-pip ~/sphinx_latest, but running + # pip there won't install sphinx at venv. + # + # Add a note about that. + + if not self.distro_msg: + self.distro_msg = \ + "Notes:\n"\ + "1. for venv, ensurepip could be broken, preventing its install method.\n" \ + "2. at least on OpenMandriva LX 4.3, texlive packages seem broken" + + else: + packager_cmd = "urpmi" + noto_sans = "google-noto-sans-cjk-ttc-fonts" + + progs["latexmk"] = "texlive-collection-basic" + + if self.pdf: + pdf_pkgs = [ + "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/TTF/NotoSans-Regular.ttf", + ] + + self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY) + self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY) + + return self.get_install_progs(progs, packager_cmd) + + def give_arch_linux_hints(self): + """ + Provide package installation hints for ArchLinux. + """ + progs = { + "convert": "imagemagick", + "dot": "graphviz", + "latexmk": "texlive-core", + "rsvg-convert": "extra/librsvg", + "virtualenv": "python-virtualenv", + "xelatex": "texlive-xetex", + "yaml": "python-yaml", + } + + archlinux_tex_pkgs = [ + "texlive-basic", + "texlive-binextra", + "texlive-core", + "texlive-fontsrecommended", + "texlive-langchinese", + "texlive-langcjk", + "texlive-latexextra", + "ttf-dejavu", + ] + + if self.pdf: + self.check_pacman_missing(archlinux_tex_pkgs, + DepManager.PDF_MANDATORY) + + self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"], + "noto-fonts-cjk", + DepManager.PDF_MANDATORY) + + + return self.get_install_progs(progs, "pacman -S") + + def give_gentoo_hints(self): + """ + Provide package installation hints for Gentoo. + """ + texlive_deps = [ + "dev-texlive/texlive-fontsrecommended", + "dev-texlive/texlive-latexextra", + "dev-texlive/texlive-xetex", + "media-fonts/dejavu", + ] + + progs = { + "convert": "media-gfx/imagemagick", + "dot": "media-gfx/graphviz", + "rsvg-convert": "gnome-base/librsvg", + "virtualenv": "dev-python/virtualenv", + "xelatex": " ".join(texlive_deps), + "yaml": "dev-python/pyyaml", + "python-sphinx": "dev-python/sphinx", + } + + if self.pdf: + pdf_pkgs = { + "media-fonts/dejavu": [ + "/usr/share/fonts/dejavu/DejaVuSans.ttf", + ], + "media-fonts/noto-cjk": [ + "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf", + "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc", + ], + } + for package, files in pdf_pkgs.items(): + self.check_missing_file(files, package, DepManager.PDF_MANDATORY) + + # Handling dependencies is a nightmare, as Gentoo refuses to emerge + # some packages if there's no package.use file describing them. + # To make it worse, compilation flags shall also be present there + # for some packages. If USE is not perfect, error/warning messages + # like those are shown: + # + # !!! The following binary packages have been ignored due to non matching USE: + # + # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg + # =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg + # =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg + # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg + # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg + # =media-fonts/noto-cjk-20190416 X + # =app-text/texlive-core-2024-r1 X cjk -xetex + # =app-text/texlive-core-2024-r1 X -xetex + # =app-text/texlive-core-2024-r1 -xetex + # =dev-libs/zziplib-0.13.79-r1 sdl + # + # And will ignore such packages, installing the remaining ones. That + # affects mostly the image extension and PDF generation. + + # Package dependencies and the minimal needed args: + portages = { + "graphviz": "media-gfx/graphviz", + "imagemagick": "media-gfx/imagemagick", + "media-libs": "media-libs/harfbuzz icu", + "media-fonts": "media-fonts/noto-cjk", + "texlive": "app-text/texlive-core xetex", + "zziblib": "dev-libs/zziplib sdl", + } + + extra_cmds = "" + if not self.distro_msg: + self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages" + + use_base = "/etc/portage/package.use" + files = glob(f"{use_base}/*") + + for fname, portage in portages.items(): + install = False + + while install is False: + if not files: + # No files under package.usage. Install all + install = True + break + + args = portage.split(" ") + + name = args.pop(0) + + cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files + result = self.run(cmd, stdout=subprocess.PIPE, text=True) + if result.returncode or not result.stdout.strip(): + # File containing portage name not found + install = True + break + + # Ensure that needed USE flags are present + if args: + match_fname = result.stdout.strip() + with open(match_fname, 'r', encoding='utf8', + errors='backslashreplace') as fp: + for line in fp: + for arg in args: + if arg.startswith("-"): + continue + + if not re.search(rf"\s*{arg}\b", line): + # Needed file argument not found + install = True + break + + # Everything looks ok, don't install + break + + # emit a code to setup missing USE + if install: + extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n") + + # Now, we can use emerge and let it respect USE + return self.get_install_progs(progs, + "emerge --ask --changed-use --binpkg-respect-use=y", + extra_cmds) + + def get_install(self): + """ + OS-specific hints logic. Seeks for a hinter. If found, use it to + provide package-manager specific install commands. + + Otherwise, outputs install instructions for the meta-packages. + + Returns a string with the command to be executed to install the + the needed packages, if distro found. Otherwise, return just a + list of packages that require installation. + """ + os_hints = { + re.compile("Red Hat Enterprise Linux"): self.give_redhat_hints, + re.compile("Fedora"): self.give_redhat_hints, + re.compile("AlmaLinux"): self.give_redhat_hints, + re.compile("Amazon Linux"): self.give_redhat_hints, + re.compile("CentOS"): self.give_redhat_hints, + re.compile("openEuler"): self.give_redhat_hints, + re.compile("Oracle Linux Server"): self.give_redhat_hints, + re.compile("Rocky Linux"): self.give_redhat_hints, + re.compile("Springdale Open Enterprise"): self.give_redhat_hints, + + re.compile("Ubuntu"): self.give_debian_hints, + re.compile("Debian"): self.give_debian_hints, + re.compile("Devuan"): self.give_debian_hints, + re.compile("Kali"): self.give_debian_hints, + re.compile("Mint"): self.give_debian_hints, + + re.compile("openSUSE"): self.give_opensuse_hints, + + re.compile("Mageia"): self.give_mageia_hints, + re.compile("OpenMandriva"): self.give_mageia_hints, + + re.compile("Arch Linux"): self.give_arch_linux_hints, + re.compile("Gentoo"): self.give_gentoo_hints, + } + + # If the OS is detected, use per-OS hint logic + for regex, os_hint in os_hints.items(): + if regex.search(self.system_release): + return os_hint() + + # + # Fall-back to generic hint code for other distros + # That's far from ideal, specially for LaTeX dependencies. + # + progs = {"sphinx-build": "sphinx"} + if self.pdf: + self.check_missing_tex() + + self.distro_msg = \ + f"I don't know distro {self.system_release}.\n" \ + "So, I can't provide you a hint with the install procedure.\n" \ + "There are likely missing dependencies." + + return self.get_install_progs(progs, None) + + # + # Common dependencies + # + def deactivate_help(self): + """ + Print a helper message to disable a virtual environment. + """ + + print("\n If you want to exit the virtualenv, you can use:") + print("\tdeactivate") + + def get_virtenv(self): + """ + Give a hint about how to activate an already-existing virtual + environment containing sphinx-build. + + Returns a tuble with (activate_cmd_path, sphinx_version) with + the newest available virtual env. + """ + + cwd = os.getcwd() + + activates = [] + + # Add all sphinx prefixes with possible version numbers + for p in self.virtenv_prefix: + activates += glob(f"{cwd}/{p}[0-9]*/bin/activate") + + activates.sort(reverse=True, key=str.lower) + + # Place sphinx_latest first, if it exists + for p in self.virtenv_prefix: + activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates + + ver = (0, 0, 0) + for f in activates: + # Discard too old Sphinx virtual environments + match = re.search(r"(\d+)\.(\d+)\.(\d+)", f) + if match: + ver = (int(match.group(1)), int(match.group(2)), int(match.group(3))) + + if ver < self.min_version: + continue + + sphinx_cmd = f.replace("activate", "sphinx-build") + if not os.path.isfile(sphinx_cmd): + continue + + ver = self.get_sphinx_version(sphinx_cmd) + + if not ver: + venv_dir = f.replace("/bin/activate", "") + print(f"Warning: virtual environment {venv_dir} is not working.\n" \ + "Python version upgrade? Remove it with:\n\n" \ + "\trm -rf {venv_dir}\n\n") + else: + if self.need_sphinx and ver >= self.min_version: + return (f, ver) + elif parse_version(ver) > self.cur_version: + return (f, ver) + + return ("", ver) + + def recommend_sphinx_upgrade(self): + """ + Check if Sphinx needs to be upgraded. + + Returns a tuple with the higest available Sphinx version if found. + Otherwise, returns None to indicate either that no upgrade is needed + or no venv was found. + """ + + # Avoid running sphinx-builds from venv if cur_version is good + if self.cur_version and self.cur_version >= RECOMMENDED_VERSION: + self.latest_avail_ver = self.cur_version + return None + + # Get the highest version from sphinx_*/bin/sphinx-build and the + # corresponding command to activate the venv/virtenv + self.activate_cmd, self.venv_ver = self.get_virtenv() + + # Store the highest version from Sphinx existing virtualenvs + if self.activate_cmd and self.venv_ver > self.cur_version: + self.latest_avail_ver = self.venv_ver + else: + if self.cur_version: + self.latest_avail_ver = self.cur_version + else: + self.latest_avail_ver = (0, 0, 0) + + # As we don't know package version of Sphinx, and there's no + # virtual environments, don't check if upgrades are needed + if not self.virtualenv: + if not self.latest_avail_ver: + return None + + return self.latest_avail_ver + + # Either there are already a virtual env or a new one should be created + self.need_pip = True + + if not self.latest_avail_ver: + return None + + # Return if the reason is due to an upgrade or not + if self.latest_avail_ver != (0, 0, 0): + if self.latest_avail_ver < RECOMMENDED_VERSION: + self.rec_sphinx_upgrade = 1 + + return self.latest_avail_ver + + def recommend_package(self): + """ + Recommend installing Sphinx as a distro-specific package. + """ + + print("\n2) As a package with:") + + old_need = self.deps.need + old_optional = self.deps.optional + + self.pdf = False + self.deps.optional = 0 + old_verbose = self.verbose_warn_install + self.verbose_warn_install = 0 + + self.deps.clear_deps() + + self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY) + + cmd = self.get_install() + if cmd: + print(cmd) + + self.deps.need = old_need + self.deps.optional = old_optional + self.verbose_warn_install = old_verbose + + def recommend_sphinx_version(self, virtualenv_cmd): + """ + Provide recommendations for installing or upgrading Sphinx based + on current version. + + The logic here is complex, as it have to deal with different versions: + + - minimal supported version; + - minimal PDF version; + - recommended version. + + It also needs to work fine with both distro's package and + venv/virtualenv + """ + + if self.recommend_python: + cur_ver = sys.version_info[:3] + if cur_ver < MIN_PYTHON_VERSION: + print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \ + "Please upgrade it and re-run.\n") + return + + # Version is OK. Nothing to do. + if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION: + return + + if self.latest_avail_ver: + latest_avail_ver = ver_str(self.latest_avail_ver) + + if not self.need_sphinx: + # sphinx-build is present and its version is >= $min_version + + # only recommend enabling a newer virtenv version if makes sense. + if self.latest_avail_ver and self.latest_avail_ver > self.cur_version: + print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:") + if f"{self.virtenv_prefix}" in os.getcwd(): + print("\tdeactivate") + print(f"\t. {self.activate_cmd}") + self.deactivate_help() + return + + if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION: + return + + if not self.virtualenv: + # No sphinx either via package or via virtenv. As we can't + # Compare the versions here, just return, recommending the + # user to install it from the package distro. + if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0): + return + + # User doesn't want a virtenv recommendation, but he already + # installed one via virtenv with a newer version. + # So, print commands to enable it + if self.latest_avail_ver > self.cur_version: + print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:") + if f"{self.virtenv_prefix}" in os.getcwd(): + print("\tdeactivate") + print(f"\t. {self.activate_cmd}") + self.deactivate_help() + return + print("\n") + else: + if self.need_sphinx: + self.deps.need += 1 + + # Suggest newer versions if current ones are too old + if self.latest_avail_ver and self.latest_avail_ver >= self.min_version: + if self.latest_avail_ver >= RECOMMENDED_VERSION: + print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:") + print(f"\t. {self.activate_cmd}") + self.deactivate_help() + return + + # Version is above the minimal required one, but may be + # below the recommended one. So, print warnings/notes + if self.latest_avail_ver < RECOMMENDED_VERSION: + print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.") + + # At this point, either it needs Sphinx or upgrade is recommended, + # both via pip + + if self.rec_sphinx_upgrade: + if not self.virtualenv: + print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n") + else: + print("To upgrade Sphinx, use:\n\n") + else: + print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n") + + if not virtualenv_cmd: + print(" Currently not possible.\n") + print(" Please upgrade Python to a newer version and run this script again") + else: + print(f"\t{virtualenv_cmd} {self.virtenv_dir}") + print(f"\t. {self.virtenv_dir}/bin/activate") + print(f"\tpip install -r {self.requirement_file}") + self.deactivate_help() + + if self.package_supported: + self.recommend_package() + + print("\n" \ + " Please note that Sphinx currentlys produce false-positive\n" \ + " warnings when the same name is used for more than one type (functions,\n" \ + " structs, enums,...). This is known Sphinx bug. For more details, see:\n" \ + "\thttps://github.com/sphinx-doc/sphinx/pull/8313") + + def check_needs(self): + """ + Main method that checks needed dependencies and provides + recommendations. + """ + self.python_cmd = sys.executable + + # Check if Sphinx is already accessible from current environment + self.check_sphinx(self.conf) + + if self.system_release: + print(f"Detected OS: {self.system_release}.") + else: + print("Unknown OS") + if self.cur_version != (0, 0, 0): + ver = ver_str(self.cur_version) + print(f"Sphinx version: {ver}\n") + + # Check the type of virtual env, depending on Python version + virtualenv_cmd = None + + if sys.version_info < MIN_PYTHON_VERSION: + min_ver = ver_str(MIN_PYTHON_VERSION) + print(f"ERROR: at least python {min_ver} is required to build the kernel docs") + self.need_sphinx = 1 + + self.venv_ver = self.recommend_sphinx_upgrade() + + if self.need_pip: + if sys.version_info < MIN_PYTHON_VERSION: + self.need_pip = False + print("Warning: python version is not supported.") + else: + virtualenv_cmd = f"{self.python_cmd} -m venv" + self.check_python_module("ensurepip") + + # Check for needed programs/tools + self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY) + + self.check_program("make", DepManager.SYSTEM_MANDATORY) + self.check_program("which", DepManager.SYSTEM_MANDATORY) + + self.check_program("dot", DepManager.SYSTEM_OPTIONAL) + self.check_program("convert", DepManager.SYSTEM_OPTIONAL) + + self.check_python_module("yaml") + + if self.pdf: + self.check_program("xelatex", DepManager.PDF_MANDATORY) + self.check_program("rsvg-convert", DepManager.PDF_MANDATORY) + self.check_program("latexmk", DepManager.PDF_MANDATORY) + + # Do distro-specific checks and output distro-install commands + cmd = self.get_install() + if cmd: + print(cmd) + + # If distro requires some special instructions, print here. + # Please notice that get_install() needs to be called first. + if self.distro_msg: + print("\n" + self.distro_msg) + + if not self.python_cmd: + if self.need == 1: + sys.exit("Can't build as 1 mandatory dependency is missing") + elif self.need: + sys.exit(f"Can't build as {self.need} mandatory dependencies are missing") + + # Check if sphinx-build is called sphinx-build-3 + if self.need_symlink: + sphinx_path = self.which("sphinx-build-3") + if sphinx_path: + print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n") + + self.recommend_sphinx_version(virtualenv_cmd) + print("") + + if not self.deps.optional: + print("All optional dependencies are met.") + + if self.deps.need == 1: + sys.exit("Can't build as 1 mandatory dependency is missing") + elif self.deps.need: + sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing") + + print("Needed package dependencies are met.") + +DESCRIPTION = """ +Process some flags related to Sphinx installation and documentation build. +""" + + +def main(): + """Main function""" + parser = argparse.ArgumentParser(description=DESCRIPTION) + + parser.add_argument( + "--no-virtualenv", + action="store_false", + dest="virtualenv", + help="Recommend installing Sphinx instead of using a virtualenv", + ) + + parser.add_argument( + "--no-pdf", + action="store_false", + dest="pdf", + help="Don't check for dependencies required to build PDF docs", + ) + + parser.add_argument( + "--version-check", + action="store_true", + dest="version_check", + help="If version is compatible, don't check for missing dependencies", + ) + + args = parser.parse_args() + + checker = SphinxDependencyChecker(args) + + checker.check_python() + checker.check_needs() + +# Call main if not used as module +if __name__ == "__main__": + main() -- cgit v1.2.3 From 7e8a8143ecc3940dbc3664b24b132ec7420d1053 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:53 +0200 Subject: docs: add support to build manpages from kerneldoc output Generating man files currently requires running a separate script. The target also doesn't appear at the docs Makefile. Add support for mandocs at the Makefile, adding the build logic inside sphinx-build-wrapper, updating documentation and dropping the ancillary script. Signed-off-by: Mauro Carvalho Chehab Message-ID: <3d248d724e7f3154f6e3a227e5923d7360201de9.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 3 +- Documentation/doc-guide/kernel-doc.rst | 29 ++++++------ Makefile | 7 +-- scripts/split-man.pl | 28 ------------ tools/docs/sphinx-build-wrapper | 81 +++++++++++++++++++++++++++++++--- 5 files changed, 98 insertions(+), 50 deletions(-) delete mode 100755 scripts/split-man.pl (limited to 'scripts') diff --git a/Documentation/Makefile b/Documentation/Makefile index 0dbbd3f50b4f..db802435bd89 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -53,7 +53,7 @@ ifeq ($(HAVE_SPHINX),0) else # HAVE_SPHINX # Common documentation targets -infodocs texinfodocs latexdocs epubdocs xmldocs pdfdocs linkcheckdocs: +mandocs infodocs texinfodocs latexdocs epubdocs xmldocs pdfdocs linkcheckdocs: $(Q)@$(srctree)/tools/docs/sphinx-pre-install --version-check +$(Q)$(PYTHON3) $(BUILD_WRAPPER) $@ \ --sphinxdirs="$(SPHINXDIRS)" --conf="$(SPHINX_CONF)" \ @@ -108,6 +108,7 @@ dochelp: @echo ' htmldocs-redirects - generate HTML redirects for moved pages' @echo ' texinfodocs - Texinfo' @echo ' infodocs - Info' + @echo ' mandocs - Man pages' @echo ' latexdocs - LaTeX' @echo ' pdfdocs - PDF' @echo ' epubdocs - EPUB' diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst index af9697e60165..4370cc8fbcf5 100644 --- a/Documentation/doc-guide/kernel-doc.rst +++ b/Documentation/doc-guide/kernel-doc.rst @@ -579,20 +579,23 @@ source. How to use kernel-doc to generate man pages ------------------------------------------- -If you just want to use kernel-doc to generate man pages you can do this -from the kernel git tree:: +To generate man pages for all files that contain kernel-doc markups, run:: - $ scripts/kernel-doc -man \ - $(git grep -l '/\*\*' -- :^Documentation :^tools) \ - | scripts/split-man.pl /tmp/man + $ make mandocs -Some older versions of git do not support some of the variants of syntax for -path exclusion. One of the following commands may work for those versions:: +Or calling ``script-build-wrapper`` directly:: - $ scripts/kernel-doc -man \ - $(git grep -l '/\*\*' -- . ':!Documentation' ':!tools') \ - | scripts/split-man.pl /tmp/man + $ ./tools/docs/sphinx-build-wrapper mandocs - $ scripts/kernel-doc -man \ - $(git grep -l '/\*\*' -- . ":(exclude)Documentation" ":(exclude)tools") \ - | scripts/split-man.pl /tmp/man +The output will be at ``/man`` directory inside the output directory +(by default: ``Documentation/output``). + +Optionally, it is possible to generate a partial set of man pages by +using SPHINXDIRS: + + $ make SPHINXDIRS=driver-api/media mandocs + +.. note:: + + When SPHINXDIRS={subdir} is used, it will only generate man pages for + the files explicitly inside a ``Documentation/{subdir}/.../*.rst`` file. diff --git a/Makefile b/Makefile index d764afe3a30a..ba1873322ac1 100644 --- a/Makefile +++ b/Makefile @@ -1799,9 +1799,10 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- -DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs htmldocs-redirects \ - epubdocs cleandocs linkcheckdocs dochelp refcheckdocs \ - texinfodocs infodocs +DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs epubdocs cleandocs \ + linkcheckdocs dochelp refcheckdocs texinfodocs infodocs mandocs \ + htmldocs-redirects + PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ diff --git a/scripts/split-man.pl b/scripts/split-man.pl deleted file mode 100755 index 96bd99dc977a..000000000000 --- a/scripts/split-man.pl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# -# Author: Mauro Carvalho Chehab -# -# Produce manpages from kernel-doc. -# See Documentation/doc-guide/kernel-doc.rst for instructions - -if ($#ARGV < 0) { - die "where do I put the results?\n"; -} - -mkdir $ARGV[0],0777; -$state = 0; -while () { - if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) { - if ($state == 1) { close OUT } - $state = 1; - $fn = "$ARGV[0]/$1.9"; - print STDERR "Creating $fn\n"; - open OUT, ">$fn" or die "can't open $fn: $!\n"; - print OUT $_; - } elsif ($state != 0) { - print OUT $_; - } -} - -close OUT; diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 8d1f77c4a880..7a6eb41837e6 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -47,12 +47,14 @@ the newer version. import argparse import locale import os +import re import shlex import shutil import subprocess import sys from concurrent import futures +from glob import glob from lib.python_version import PythonVersion from lib.latex_fonts import LatexFontChecker @@ -77,6 +79,7 @@ TARGETS = { "epubdocs": { "builder": "epub", "out_dir": "epub" }, "texinfodocs": { "builder": "texinfo", "out_dir": "texinfo" }, "infodocs": { "builder": "texinfo", "out_dir": "texinfo" }, + "mandocs": { "builder": "man", "out_dir": "man" }, "latexdocs": { "builder": "latex", "out_dir": "latex" }, "pdfdocs": { "builder": "latex", "out_dir": "latex" }, "xmldocs": { "builder": "xml", "out_dir": "xml" }, @@ -503,6 +506,71 @@ class SphinxBuilder: except subprocess.CalledProcessError as e: sys.exit(f"Error generating info docs: {e}") + def handle_man(self, kerneldoc, docs_dir, src_dir, output_dir): + """ + Create man pages from kernel-doc output + """ + + re_kernel_doc = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") + re_man = re.compile(r'^\.TH "[^"]*" (\d+) "([^"]*)"') + + if docs_dir == src_dir: + # + # Pick the entire set of kernel-doc markups from the entire tree + # + kdoc_files = set([self.srctree]) + else: + kdoc_files = set() + + for fname in glob(os.path.join(src_dir, "**"), recursive=True): + if os.path.isfile(fname) and fname.endswith(".rst"): + with open(fname, "r", encoding="utf-8") as in_fp: + data = in_fp.read() + + for line in data.split("\n"): + match = re_kernel_doc.match(line) + if match: + if os.path.isfile(match.group(1)): + kdoc_files.add(match.group(1)) + + if not kdoc_files: + sys.exit(f"Directory {src_dir} doesn't contain kernel-doc tags") + + cmd = [ kerneldoc, "-m" ] + sorted(kdoc_files) + try: + if self.verbose: + print(" ".join(cmd)) + + result = subprocess.run(cmd, stdout=subprocess.PIPE, text= True) + + if result.returncode: + print(f"Warning: kernel-doc returned {result.returncode} warnings") + + except (OSError, ValueError, subprocess.SubprocessError) as e: + sys.exit(f"Failed to create man pages for {src_dir}: {repr(e)}") + + fp = None + try: + for line in result.stdout.split("\n"): + match = re_man.match(line) + if not match: + if fp: + fp.write(line + '\n') + continue + + if fp: + fp.close() + + fname = f"{output_dir}/{match.group(2)}.{match.group(1)}" + + if self.verbose: + print(f"Creating {fname}") + fp = open(fname, "w", encoding="utf-8") + fp.write(line + '\n') + finally: + if fp: + fp.close() + def cleandocs(self, builder): # pylint: disable=W0613 """Remove documentation output directory""" shutil.rmtree(self.builddir, ignore_errors=True) @@ -531,7 +599,7 @@ class SphinxBuilder: # Other targets require sphinx-build, so check if it exists # sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"]) - if not sphinxbuild: + if not sphinxbuild and target != "mandocs": sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n") if builder == "latex": @@ -619,10 +687,13 @@ class SphinxBuilder: output_dir, ] - try: - self.run_sphinx(sphinxbuild, build_args, env=self.env) - except (OSError, ValueError, subprocess.SubprocessError) as e: - sys.exit(f"Build failed: {repr(e)}") + if target == "mandocs": + self.handle_man(kerneldoc, docs_dir, src_dir, output_dir) + else: + try: + self.run_sphinx(sphinxbuild, build_args, env=self.env) + except (OSError, ValueError, subprocess.SubprocessError) as e: + sys.exit(f"Build failed: {repr(e)}") # # Ensure that each html/epub output will have needed static files -- cgit v1.2.3 From 104e0a682e12e6f52267a945c9ed9cf92a46fa56 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:54 +0200 Subject: tools: kernel-doc: add a see also section at man pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While cross-references are complex, as related ones can be on different files, we can at least correlate the ones that belong to the same file, adding a SEE ALSO section for them. The result is not bad. See for instance: $ tools/docs/sphinx-build-wrapper --sphinxdirs driver-api/media -- mandocs $ man Documentation/output/driver-api/man/edac_pci_add_device.9 edac_pci_add_device(9) Kernel Hacker's Manual edac_pci_add_device(9) NAME edac_pci_add_device - Insert the 'edac_dev' structure into the edac_pci global list and create sysfs entries associated with edac_pci structure. SYNOPSIS int edac_pci_add_device (struct edac_pci_ctl_info *pci , int edac_idx ); ARGUMENTS pci pointer to the edac_device structure to be added to the list edac_idx A unique numeric identifier to be assigned to the RETURN 0 on Success, or an error code on failure SEE ALSO edac_pci_alloc_ctl_info(9), edac_pci_free_ctl_info(9), edac_pci_alloc_index(9), edac_pci_del_device(9), edac_pci_cre‐ ate_generic_ctl(9), edac_pci_release_generic_ctl(9), edac_pci_create_sysfs(9), edac_pci_remove_sysfs(9) August 2025 edac_pci_add_device edac_pci_add_device(9) Signed-off-by: Mauro Carvalho Chehab Message-ID: Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_files.py | 5 ++- scripts/lib/kdoc/kdoc_output.py | 84 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index 9e09b45b02fa..061c033f32da 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -275,7 +275,10 @@ class KernelFiles(): self.config.log.warning("No kernel-doc for file %s", fname) continue - for arg in self.results[fname]: + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: m = self.out_msg(fname, arg.name, arg) if m is None: diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index ea8914537ba0..1eca9a918558 100644 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -215,6 +215,9 @@ class OutputFormat: # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + def out_doc(self, fname, name, args): """Outputs a DOC block""" @@ -577,6 +580,7 @@ class ManFormat(OutputFormat): super().__init__() self.modulename = modulename + self.symbols = [] dt = None tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") @@ -593,6 +597,68 @@ class ManFormat(OutputFormat): self.man_date = dt.strftime("%B %Y") + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -618,7 +684,9 @@ class ManFormat(OutputFormat): if not self.check_doc(name, args): return - self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -627,7 +695,9 @@ class ManFormat(OutputFormat): def out_function(self, fname, name, args): """output function in man""" - self.data += f'.TH "{name}" 9 "{name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -671,7 +741,9 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_enum(self, fname, name, args): - self.data += f'.TH "{self.modulename}" 9 "enum {name}" "{self.man_date}" "API Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -703,8 +775,9 @@ class ManFormat(OutputFormat): def out_typedef(self, fname, name, args): module = self.modulename purpose = args.get('purpose') + out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -717,8 +790,9 @@ class ManFormat(OutputFormat): module = self.modulename purpose = args.get('purpose') definition = args.get('definition') + out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{args.type} {name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- cgit v1.2.3 From ade9b9576e2f000fb2ef0ac3bcd26e1167fd813b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:55 +0200 Subject: scripts: kdoc_parser.py: warn about Python version only once When running kernel-doc over multiple documents, it emits one error message per file with is not what we want: $ python3.6 scripts/kernel-doc.py . --none ... Warning: ./include/trace/events/swiotlb.h:0 Python 3.7 or later is required for correct results Warning: ./include/trace/events/iommu.h:0 Python 3.7 or later is required for correct results Warning: ./include/trace/events/sock.h:0 Python 3.7 or later is required for correct results ... Change the logic to warn it only once at the library: $ python3.6 scripts/kernel-doc.py . --none Warning: Python 3.7 or later is required for correct results Warning: ./include/cxl/features.h:0 Python 3.7 or later is required for correct results When running from command line, it warns twice, but that sounds ok. Signed-off-by: Mauro Carvalho Chehab Message-ID: <68e54cf8b1201d1f683aad9bc710a99421910356.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2376f180b1fa..89d920e0b65c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -350,6 +350,7 @@ class KernelEntry: self.section = SECTION_DEFAULT self._contents = [] +python_warning = False class KernelDoc: """ @@ -383,9 +384,13 @@ class KernelDoc: # We need Python 3.7 for its "dicts remember the insertion # order" guarantee # - if sys.version_info.major == 3 and sys.version_info.minor < 7: + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + self.emit_msg(0, 'Python 3.7 or later is required for correct results') + python_warning = True def emit_msg(self, ln, msg, warning=True): """Emit a message""" -- cgit v1.2.3 From c2381e8a6105dcc3eeee93766ba628cf656057f3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 20 Sep 2025 11:40:25 +0200 Subject: scripts: remove sphinx-build-wrapper from scripts/ Commit 8a298579cdfc ("scripts: sphinx-build-wrapper: get rid of uapi/media Makefile") accidentally added scripts/sphinx-build-wrapper, probably due to some rebase issues. The file was added on a separate patch series, at tools/docs, and has other patches on the top of it, so drop this extra version. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/sphinx-build-wrapper | 719 ------------------------------------------- 1 file changed, 719 deletions(-) delete mode 100755 scripts/sphinx-build-wrapper (limited to 'scripts') diff --git a/scripts/sphinx-build-wrapper b/scripts/sphinx-build-wrapper deleted file mode 100755 index abe8c26ae137..000000000000 --- a/scripts/sphinx-build-wrapper +++ /dev/null @@ -1,719 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2025 Mauro Carvalho Chehab -# -# pylint: disable=R0902, R0912, R0913, R0914, R0915, R0917, C0103 -# -# Converted from docs Makefile and parallel-wrapper.sh, both under -# GPLv2, copyrighted since 2008 by the following authors: -# -# Akira Yokosawa -# Arnd Bergmann -# Breno Leitao -# Carlos Bilbao -# Dave Young -# Donald Hunter -# Geert Uytterhoeven -# Jani Nikula -# Jan Stancek -# Jonathan Corbet -# Joshua Clayton -# Kees Cook -# Linus Torvalds -# Magnus Damm -# Masahiro Yamada -# Mauro Carvalho Chehab -# Maxim Cournoyer -# Peter Foley -# Randy Dunlap -# Rob Herring -# Shuah Khan -# Thorsten Blum -# Tomas Winkler - - -""" -Sphinx build wrapper that handles Kernel-specific business rules: - -- it gets the Kernel build environment vars; -- it determines what's the best parallelism; -- it handles SPHINXDIRS - -This tool ensures that MIN_PYTHON_VERSION is satisfied. If version is -below that, it seeks for a new Python version. If found, it re-runs using -the newer version. -""" - -import argparse -import locale -import os -import re -import shlex -import shutil -import subprocess -import sys - -from concurrent import futures -from glob import glob - -LIB_DIR = "lib" -SRC_DIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - -from jobserver import JobserverExec # pylint: disable=C0413 - - -def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - -def ver_str(version): - """Returns a version tuple as major.minor.patch""" - - return ".".join([str(x) for x in version]) - -# Minimal supported Python version needed by Sphinx and its extensions -MIN_PYTHON_VERSION = parse_version("3.7") - -# Default value for --venv parameter -VENV_DEFAULT = "sphinx_latest" - -# List of make targets and its corresponding builder and output directory -TARGETS = { - "cleandocs": { - "builder": "clean", - }, - "htmldocs": { - "builder": "html", - }, - "epubdocs": { - "builder": "epub", - "out_dir": "epub", - }, - "texinfodocs": { - "builder": "texinfo", - "out_dir": "texinfo", - }, - "infodocs": { - "builder": "texinfo", - "out_dir": "texinfo", - }, - "latexdocs": { - "builder": "latex", - "out_dir": "latex", - }, - "pdfdocs": { - "builder": "latex", - "out_dir": "latex", - }, - "xmldocs": { - "builder": "xml", - "out_dir": "xml", - }, - "linkcheckdocs": { - "builder": "linkcheck" - }, -} - -# Paper sizes. An empty value will pick the default -PAPER = ["", "a4", "letter"] - -class SphinxBuilder: - """ - Handles a sphinx-build target, adding needed arguments to build - with the Kernel. - """ - - def is_rust_enabled(self): - """Check if rust is enabled at .config""" - config_path = os.path.join(self.srctree, ".config") - if os.path.isfile(config_path): - with open(config_path, "r", encoding="utf-8") as f: - return "CONFIG_RUST=y" in f.read() - return False - - def get_path(self, path, abs_path=False): - """ - Ancillary routine to handle patches the right way, as shell does. - - It first expands "~" and "~user". Then, if patch is not absolute, - join self.srctree. Finally, if requested, convert to abspath. - """ - - path = os.path.expanduser(path) - if not path.startswith("/"): - path = os.path.join(self.srctree, path) - - if abs_path: - return os.path.abspath(path) - - return path - - def __init__(self, venv=None, verbose=False, n_jobs=None, interactive=None): - """Initialize internal variables""" - self.venv = venv - self.verbose = None - - # Normal variables passed from Kernel's makefile - self.kernelversion = os.environ.get("KERNELVERSION", "unknown") - self.kernelrelease = os.environ.get("KERNELRELEASE", "unknown") - self.pdflatex = os.environ.get("PDFLATEX", "xelatex") - - if not interactive: - self.latexopts = os.environ.get("LATEXOPTS", "-interaction=batchmode -no-shell-escape") - else: - self.latexopts = os.environ.get("LATEXOPTS", "") - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", "") != "") - - # Handle SPHINXOPTS evironment - sphinxopts = shlex.split(os.environ.get("SPHINXOPTS", "")) - - # As we handle number of jobs and quiet in separate, we need to pick - # it the same way as sphinx-build would pick, so let's use argparse - # do to the right argument expansion - parser = argparse.ArgumentParser() - parser.add_argument('-j', '--jobs', type=int) - parser.add_argument('-q', '--quiet', type=int) - - # Other sphinx-build arguments go as-is, so place them - # at self.sphinxopts - sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts) - if sphinx_args.quiet == True: - self.verbose = False - - if sphinx_args.jobs: - self.n_jobs = sphinx_args.jobs - - # Command line arguments was passed, override SPHINXOPTS - if verbose is not None: - self.verbose = verbose - - self.n_jobs = n_jobs - - # Source tree directory. This needs to be at os.environ, as - # Sphinx extensions and media uAPI makefile needs it - self.srctree = os.environ.get("srctree") - if not self.srctree: - self.srctree = "." - os.environ["srctree"] = self.srctree - - # Now that we can expand srctree, get other directories as well - self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build") - self.kerneldoc = self.get_path(os.environ.get("KERNELDOC", - "scripts/kernel-doc.py")) - self.obj = os.environ.get("obj", "Documentation") - self.builddir = self.get_path(os.path.join(self.obj, "output"), - abs_path=True) - - # Media uAPI needs it - os.environ["BUILDDIR"] = self.builddir - - # Detect if rust is enabled - self.config_rust = self.is_rust_enabled() - - # Get directory locations for LaTeX build toolchain - self.pdflatex_cmd = shutil.which(self.pdflatex) - self.latexmk_cmd = shutil.which("latexmk") - - self.env = os.environ.copy() - - # If venv parameter is specified, run Sphinx from venv - if venv: - bin_dir = os.path.join(venv, "bin") - if os.path.isfile(os.path.join(bin_dir, "activate")): - # "activate" virtual env - self.env["PATH"] = bin_dir + ":" + self.env["PATH"] - self.env["VIRTUAL_ENV"] = venv - if "PYTHONHOME" in self.env: - del self.env["PYTHONHOME"] - print(f"Setting venv to {venv}") - else: - sys.exit(f"Venv {venv} not found.") - - def run_sphinx(self, sphinx_build, build_args, *args, **pwargs): - """ - Executes sphinx-build using current python3 command and setting - -j parameter if possible to run the build in parallel. - """ - - with JobserverExec() as jobserver: - if jobserver.claim: - n_jobs = str(jobserver.claim) - else: - n_jobs = "auto" # Supported since Sphinx 1.7 - - cmd = [] - - if self.venv: - cmd.append("python") - else: - cmd.append(sys.executable) - - cmd.append(sphinx_build) - - # if present, SPHINXOPTS or command line --jobs overrides default - if self.n_jobs: - n_jobs = str(self.n_jobs) - - if n_jobs: - cmd += [f"-j{n_jobs}"] - - if not self.verbose: - cmd.append("-q") - - cmd += self.sphinxopts - - cmd += build_args - - if self.verbose: - print(" ".join(cmd)) - - rc = subprocess.call(cmd, *args, **pwargs) - - def handle_html(self, css, output_dir): - """ - Extra steps for HTML and epub output. - - For such targets, we need to ensure that CSS will be properly - copied to the output _static directory - """ - - if not css: - return - - css = os.path.expanduser(css) - if not css.startswith("/"): - css = os.path.join(self.srctree, css) - - static_dir = os.path.join(output_dir, "_static") - os.makedirs(static_dir, exist_ok=True) - - try: - shutil.copy2(css, static_dir) - except (OSError, IOError) as e: - print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr) - - def build_pdf_file(self, latex_cmd, from_dir, path): - """Builds a single pdf file using latex_cmd""" - try: - subprocess.run(latex_cmd + [path], - cwd=from_dir, check=True) - - return True - except subprocess.CalledProcessError: - # LaTeX PDF error code is almost useless: it returns - # error codes even when build succeeds but has warnings. - # So, we'll ignore the results - return False - - def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs): - """Build PDF files in parallel if possible""" - builds = {} - build_failed = False - max_len = 0 - has_tex = False - - # Process files in parallel - with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor: - jobs = {} - - for from_dir, pdf_dir, entry in tex_files: - name = entry.name - - if not name.endswith(tex_suffix): - continue - - name = name[:-len(tex_suffix)] - - max_len = max(max_len, len(name)) - - has_tex = True - - future = executor.submit(self.build_pdf_file, latex_cmd, - from_dir, entry.path) - jobs[future] = (from_dir, name, entry.path) - - for future in futures.as_completed(jobs): - from_dir, name, path = jobs[future] - - pdf_name = name + ".pdf" - pdf_from = os.path.join(from_dir, pdf_name) - - try: - success = future.result() - - if success and os.path.exists(pdf_from): - pdf_to = os.path.join(pdf_dir, pdf_name) - - os.rename(pdf_from, pdf_to) - builds[name] = os.path.relpath(pdf_to, self.builddir) - else: - builds[name] = "FAILED" - build_failed = True - except Exception as e: - builds[name] = f"FAILED ({str(e)})" - build_failed = True - - # Handle case where no .tex files were found - if not has_tex: - name = "Sphinx LaTeX builder" - max_len = max(max_len, len(name)) - builds[name] = "FAILED (no .tex file was generated)" - build_failed = True - - return builds, build_failed, max_len - - def handle_pdf(self, output_dirs): - """ - Extra steps for PDF output. - - As PDF is handled via a LaTeX output, after building the .tex file, - a new build is needed to create the PDF output from the latex - directory. - """ - builds = {} - max_len = 0 - tex_suffix = ".tex" - - # Get all tex files that will be used for PDF build - tex_files = [] - for from_dir in output_dirs: - pdf_dir = os.path.join(from_dir, "../pdf") - os.makedirs(pdf_dir, exist_ok=True) - - if self.latexmk_cmd: - latex_cmd = [self.latexmk_cmd, f"-{self.pdflatex}"] - else: - latex_cmd = [self.pdflatex] - - latex_cmd.extend(shlex.split(self.latexopts)) - - # Get a list of tex files to process - with os.scandir(from_dir) as it: - for entry in it: - if entry.name.endswith(tex_suffix): - tex_files.append((from_dir, pdf_dir, entry)) - - # When using make, this won't be used, as the number of jobs comes - # from POSIX jobserver. So, this covers the case where build comes - # from command line. On such case, serialize by default, except if - # the user explicitly sets the number of jobs. - n_jobs = 1 - - # n_jobs is either an integer or "auto". Only use it if it is a number - if self.n_jobs: - try: - n_jobs = int(self.n_jobs) - except ValueError: - pass - - # When using make, jobserver.claim is the number of jobs that were - # used with "-j" and that aren't used by other make targets - with JobserverExec() as jobserver: - n_jobs = 1 - - # Handle the case when a parameter is passed via command line, - # using it as default, if jobserver doesn't claim anything - if self.n_jobs: - try: - n_jobs = int(self.n_jobs) - except ValueError: - pass - - if jobserver.claim: - n_jobs = jobserver.claim - - # Build files in parallel - builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix, - latex_cmd, - tex_files, - n_jobs) - - msg = "Summary" - msg += "\n" + "=" * len(msg) - print() - print(msg) - - for pdf_name, pdf_file in builds.items(): - print(f"{pdf_name:<{max_len}}: {pdf_file}") - - print() - - # return an error if a PDF file is missing - - if build_failed: - sys.exit(f"PDF build failed: not all PDF files were created.") - else: - print("All PDF files were built.") - - def handle_info(self, output_dirs): - """ - Extra steps for Info output. - - For texinfo generation, an additional make is needed from the - texinfo directory. - """ - - for output_dir in output_dirs: - try: - subprocess.run(["make", "info"], cwd=output_dir, check=True) - except subprocess.CalledProcessError as e: - sys.exit(f"Error generating info docs: {e}") - - def cleandocs(self, builder): - - shutil.rmtree(self.builddir, ignore_errors=True) - - def build(self, target, sphinxdirs=None, conf="conf.py", - theme=None, css=None, paper=None): - """ - Build documentation using Sphinx. This is the core function of this - module. It prepares all arguments required by sphinx-build. - """ - - builder = TARGETS[target]["builder"] - out_dir = TARGETS[target].get("out_dir", "") - - # Cleandocs doesn't require sphinx-build - if target == "cleandocs": - self.cleandocs(builder) - return - - # Other targets require sphinx-build - sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"]) - if not sphinxbuild: - sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n") - - if builder == "latex": - if not self.pdflatex_cmd and not self.latexmk_cmd: - sys.exit("Error: pdflatex or latexmk required for PDF generation") - - docs_dir = os.path.abspath(os.path.join(self.srctree, "Documentation")) - - # Prepare base arguments for Sphinx build - kerneldoc = self.kerneldoc - if kerneldoc.startswith(self.srctree): - kerneldoc = os.path.relpath(kerneldoc, self.srctree) - - # Prepare common Sphinx options - args = [ - "-b", builder, - "-c", docs_dir, - ] - - if builder == "latex": - if not paper: - paper = PAPER[1] - - args.extend(["-D", f"latex_elements.papersize={paper}paper"]) - - if self.config_rust: - args.extend(["-t", "rustdoc"]) - - if conf: - self.env["SPHINX_CONF"] = self.get_path(conf, abs_path=True) - - if not sphinxdirs: - sphinxdirs = os.environ.get("SPHINXDIRS", ".") - - # The sphinx-build tool has a bug: internally, it tries to set - # locale with locale.setlocale(locale.LC_ALL, ''). This causes a - # crash if language is not set. Detect and fix it. - try: - locale.setlocale(locale.LC_ALL, '') - except Exception: - self.env["LC_ALL"] = "C" - self.env["LANG"] = "C" - - # sphinxdirs can be a list or a whitespace-separated string - sphinxdirs_list = [] - for sphinxdir in sphinxdirs: - if isinstance(sphinxdir, list): - sphinxdirs_list += sphinxdir - else: - for name in sphinxdir.split(" "): - sphinxdirs_list.append(name) - - # Build each directory - output_dirs = [] - for sphinxdir in sphinxdirs_list: - src_dir = os.path.join(docs_dir, sphinxdir) - doctree_dir = os.path.join(self.builddir, ".doctrees") - output_dir = os.path.join(self.builddir, sphinxdir, out_dir) - - # Make directory names canonical - src_dir = os.path.normpath(src_dir) - doctree_dir = os.path.normpath(doctree_dir) - output_dir = os.path.normpath(output_dir) - - os.makedirs(doctree_dir, exist_ok=True) - os.makedirs(output_dir, exist_ok=True) - - output_dirs.append(output_dir) - - build_args = args + [ - "-d", doctree_dir, - "-D", f"kerneldoc_bin={kerneldoc}", - "-D", f"version={self.kernelversion}", - "-D", f"release={self.kernelrelease}", - "-D", f"kerneldoc_srctree={self.srctree}", - src_dir, - output_dir, - ] - - # Execute sphinx-build - try: - self.run_sphinx(sphinxbuild, build_args, env=self.env) - except Exception as e: - sys.exit(f"Build failed: {e}") - - # Ensure that html/epub will have needed static files - if target in ["htmldocs", "epubdocs"]: - self.handle_html(css, output_dir) - - # PDF and Info require a second build step - if target == "pdfdocs": - self.handle_pdf(output_dirs) - elif target == "infodocs": - self.handle_info(output_dirs) - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - result = subprocess.run([cmd, "--version"], check=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9]", - "python3.[0-9][0-9]", - ] - - # Seek for a python binary newer than MIN_PYTHON_VERSION - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = SphinxBuilder.get_python_version(cmd) - if version >= MIN_PYTHON_VERSION: - return cmd - - return None - - @staticmethod - def check_python(): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= MIN_PYTHON_VERSION: - return - - python_ver = ver_str(cur_ver) - - new_python_cmd = SphinxBuilder.find_python() - if not new_python_cmd: - sys.exit(f"Python version {python_ver} is not supported anymore.") - - # Restart script using the newer version - script_path = os.path.abspath(sys.argv[0]) - args = [new_python_cmd, script_path] + sys.argv[1:] - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") - -def jobs_type(value): - """ - Handle valid values for -j. Accepts Sphinx "-jauto", plus a number - equal or bigger than one. - """ - if value is None: - return None - - if value.lower() == 'auto': - return value.lower() - - try: - if int(value) >= 1: - return value - - raise argparse.ArgumentTypeError(f"Minimum jobs is 1, got {value}") - except ValueError: - raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}") - -def main(): - """ - Main function. The only mandatory argument is the target. If not - specified, the other arguments will use default values if not - specified at os.environ. - """ - parser = argparse.ArgumentParser(description="Kernel documentation builder") - - parser.add_argument("target", choices=list(TARGETS.keys()), - help="Documentation target to build") - parser.add_argument("--sphinxdirs", nargs="+", - help="Specific directories to build") - parser.add_argument("--conf", default="conf.py", - help="Sphinx configuration file") - - parser.add_argument("--theme", help="Sphinx theme to use") - - parser.add_argument("--css", help="Custom CSS file for HTML/EPUB") - - parser.add_argument("--paper", choices=PAPER, default=PAPER[0], - help="Paper size for LaTeX/PDF output") - - parser.add_argument("-v", "--verbose", action='store_true', - help="place build in verbose mode") - - parser.add_argument('-j', '--jobs', type=jobs_type, - help="Sets number of jobs to use with sphinx-build") - - parser.add_argument('-i', '--interactive', action='store_true', - help="Change latex default to run in interactive mode") - - parser.add_argument("-V", "--venv", nargs='?', const=f'{VENV_DEFAULT}', - default=None, - help=f'If used, run Sphinx from a venv dir (default dir: {VENV_DEFAULT})') - - args = parser.parse_args() - - SphinxBuilder.check_python() - - builder = SphinxBuilder(venv=args.venv, verbose=args.verbose, - n_jobs=args.jobs, interactive=args.interactive) - - builder.build(args.target, sphinxdirs=args.sphinxdirs, conf=args.conf, - theme=args.theme, css=args.css, paper=args.paper) - -if __name__ == "__main__": - main() -- cgit v1.2.3 From 2bd22194b26ffbbd9fbb71ffbb608b61e024b563 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 1 Oct 2025 16:13:59 +0200 Subject: kernel-doc: output source file name at SEE ALSO for man pages, it is helpful to know from where the man page were generated. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/lib/kdoc/kdoc_item.py | 3 ++- scripts/lib/kdoc/kdoc_output.py | 3 ++- scripts/lib/kdoc/kdoc_parser.py | 8 +++++--- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py index b3b225764550..19805301cb2c 100644 --- a/scripts/lib/kdoc/kdoc_item.py +++ b/scripts/lib/kdoc/kdoc_item.py @@ -5,8 +5,9 @@ # class KdocItem: - def __init__(self, name, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, **other_stuff): self.name = name + self.fname = fname self.type = type self.declaration_start_line = start_line self.sections = {} diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index 1eca9a918558..58f115059e93 100644 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -630,10 +630,11 @@ class ManFormat(OutputFormat): """Adds a tail for all man pages""" # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") if len(self.symbols) >= 2: cur_name = self.arg_name(args, name) - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" related = [] for arg in self.symbols: out_name = self.arg_name(arg, arg.name) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 89d920e0b65c..6e5c115cbdf3 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -254,8 +254,9 @@ SECTION_DEFAULT = "Description" # default section class KernelEntry: - def __init__(self, config, ln): + def __init__(self, config, fname, ln): self.config = config + self.fname = fname self._contents = [] self.prototype = "" @@ -422,7 +423,8 @@ class KernelDoc: The actual output and output filters will be handled elsewhere """ - item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) item.warnings = self.entry.warnings # Drop empty sections @@ -445,7 +447,7 @@ class KernelDoc: variables used by the state machine. """ - self.entry = KernelEntry(self.config, ln) + self.entry = KernelEntry(self.config, self.fname, ln) # State flags self.state = state.NORMAL -- cgit v1.2.3 From 567f9c428f99560fe14e647def9f42f5344ebde9 Mon Sep 17 00:00:00 2001 From: John Wang Date: Fri, 28 Mar 2025 15:38:02 +0800 Subject: scripts/faddr2line: Set LANG=C to enforce ASCII output Force tools like readelf to use the POSIX/C locale by exporting LANG=C This ensures ASCII-only output and avoids locale-specific characters(e.g., UTF-8 symbols or translated strings), which could break text processing utilities like sed in the script Signed-off-by: John Wang Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 1fa6beef9f97..1f364fbb0cd8 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -76,6 +76,10 @@ ADDR2LINE="${UTIL_PREFIX}addr2line${UTIL_SUFFIX}" AWK="awk" GREP="grep" +# Enforce ASCII-only output from tools like readelf +# ensuring sed processes strings correctly. +export LANG=C + command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed" command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed" command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" -- cgit v1.2.3 From 6b4679fcbfdf6f27f8455f9c7050ab6c46c6c5e0 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Sun, 21 Sep 2025 12:03:57 +0200 Subject: scripts/faddr2line: Use /usr/bin/env bash for portability The shebang `#!/bin/bash` assumes a fixed path for the bash interpreter. This path does not exist on some systems, such as NixOS, causing the script to fail. Replace `/bin/bash` with the more portable `#!/usr/bin/env bash`. Signed-off-by: Pankaj Raghav Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f364fbb0cd8..7746d4ad0bfa 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0 # # Translate stack dump function offsets. -- cgit v1.2.3 From ff5c0466486ba8d07ab2700380e8fd6d5344b4e9 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Sun, 21 Sep 2025 12:03:58 +0200 Subject: scripts/faddr2line: Fix "Argument list too long" error The run_readelf() function reads the entire output of readelf into a single shell variable. For large object files with extensive debug information, the size of this variable can exceed the system's command-line argument length limit. When this variable is subsequently passed to sed via `echo "${out}"`, it triggers an "Argument list too long" error, causing the script to fail. Fix this by redirecting the output of readelf to a temporary file instead of a variable. The sed commands are then modified to read from this file, avoiding the argument length limitation entirely. Signed-off-by: Pankaj Raghav Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 7746d4ad0bfa..622875396bcf 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -111,14 +111,19 @@ find_dir_prefix() { run_readelf() { local objfile=$1 - local out=$(${READELF} --file-header --section-headers --symbols --wide $objfile) + local tmpfile + tmpfile=$(mktemp) + + ${READELF} --file-header --section-headers --symbols --wide "$objfile" > "$tmpfile" # This assumes that readelf first prints the file header, then the section headers, then the symbols. # Note: It seems that GNU readelf does not prefix section headers with the "There are X section headers" # line when multiple options are given, so let's also match with the "Section Headers:" line. - ELF_FILEHEADER=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/q;p') - ELF_SECHEADERS=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/,$p' | sed -n '/Symbol table .* contains [0-9]* entries:/q;p') - ELF_SYMS=$(echo "${out}" | sed -n '/Symbol table .* contains [0-9]* entries:/,$p') + ELF_FILEHEADER=$(sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/q;p' "$tmpfile") + ELF_SECHEADERS=$(sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/,$p' "$tmpfile" | sed -n '/Symbol table .* contains [0-9]* entries:/q;p') + ELF_SYMS=$(sed -n '/Symbol table .* contains [0-9]* entries:/,$p' "$tmpfile") + + rm -f -- "$tmpfile" } check_vmlinux() { -- cgit v1.2.3 From 1ba9f8979426590367406c70c1c821f5b943f993 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:10 -0700 Subject: vmlinux.lds: Unify TEXT_MAIN, DATA_MAIN, and related macros TEXT_MAIN, DATA_MAIN and friends are defined differently depending on whether certain config options enable -ffunction-sections and/or -fdata-sections. There's no technical reason for that beyond voodoo coding. Keeping the separate implementations adds unnecessary complexity, fragments the logic, and increases the risk of subtle bugs. Unify the macros by using the same input section patterns across all configs. This is a prerequisite for the upcoming livepatch klp-build tooling which will manually enable -ffunction-sections and -fdata-sections via KCFLAGS. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- include/asm-generic/vmlinux.lds.h | 40 ++++++++++++--------------------------- scripts/module.lds.S | 12 +++++------- 2 files changed, 17 insertions(+), 35 deletions(-) (limited to 'scripts') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a9a2e732a65..5facbc994634 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -87,39 +87,24 @@ #define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT) /* - * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which - * generates .data.identifier sections, which need to be pulled in with - * .data. We don't want to pull in .data..other sections, which Linux - * has defined. Same for text and bss. + * Support -ffunction-sections by matching .text and .text.*, + * but exclude '.text..*'. * - * With LTO_CLANG, the linker also splits sections by default, so we need - * these macros to combine the sections during the final link. - * - * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits - * text sections and regroups functions into subsections. - * - * RODATA_MAIN is not used because existing code already defines .rodata.x - * sections to be brought in with rodata. + * Special .text.* sections that are typically grouped separately, such as + * .text.unlikely or .text.hot, must be matched explicitly before using + * TEXT_MAIN. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ -defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#else -#define TEXT_MAIN .text -#endif -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) + +/* + * Support -fdata-sections by matching .data, .data.*, and others, + * but exclude '.data..*'. + */ #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* -#else -#define DATA_MAIN .data .data.rel .data.rel.local -#define SDATA_MAIN .sdata -#define RODATA_MAIN .rodata -#define BSS_MAIN .bss -#define SBSS_MAIN .sbss -#endif /* * GCC 4.5 and later have a 32 bytes section alignment for structures. @@ -581,9 +566,8 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) * during second ld run in second ld pass when generating System.map * * TEXT_MAIN here will match symbols with a fixed pattern (for example, - * .text.hot or .text.unlikely) if dead code elimination or - * function-section is enabled. Match these symbols first before - * TEXT_MAIN to ensure they are grouped together. + * .text.hot or .text.unlikely). Match those before TEXT_MAIN to ensure + * they get grouped together. * * Also placing .text.hot section at the beginning of a page, this * would help the TLB performance. diff --git a/scripts/module.lds.S b/scripts/module.lds.S index ee79c41059f3..2632c6cb8ebe 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -38,12 +38,10 @@ SECTIONS { __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif -#ifdef CONFIG_LTO_CLANG - /* - * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and - * -ffunction-sections, which increases the size of the final module. - * Merge the split sections in the final binary. - */ + .text : { + *(.text .text.[0-9a-zA-Z_]*) + } + .bss : { *(.bss .bss.[0-9a-zA-Z_]*) *(.bss..L*) @@ -58,7 +56,7 @@ SECTIONS { *(.rodata .rodata.[0-9a-zA-Z_]*) *(.rodata..L*) } -#endif + MOD_SEPARATE_CODETAG_SECTIONS() } -- cgit v1.2.3 From 6717e8f91db71641cb52855ed14c7900972ed0bc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:16 -0700 Subject: kbuild: Remove 'kmod_' prefix from __KBUILD_MODNAME In preparation for the objtool klp diff subcommand, remove the arbitrary 'kmod_' prefix from __KBUILD_MODNAME and instead add it explicitly in the __initcall_id() macro. This change supports the standardization of "unique" symbol naming by ensuring the non-unique portion of the name comes before the unique part. That will enable objtool to properly correlate symbols across builds. Cc: Masahiro Yamada Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- include/linux/init.h | 3 ++- scripts/Makefile.lib | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/include/linux/init.h b/include/linux/init.h index 17c1bc712e23..40331923b9f4 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -200,12 +200,13 @@ extern struct module __this_module; /* Format: ____ */ #define __initcall_id(fn) \ + __PASTE(kmod_, \ __PASTE(__KBUILD_MODNAME, \ __PASTE(__, \ __PASTE(__COUNTER__, \ __PASTE(_, \ __PASTE(__LINE__, \ - __PASTE(_, fn)))))) + __PASTE(_, fn))))))) /* Format: ____ */ #define __initcall_name(prefix, __iid, id) \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 1d581ba5df66..b95560266124 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -20,7 +20,7 @@ name-fix-token = $(subst $(comma),_,$(subst -,_,$1)) name-fix = $(call stringify,$(call name-fix-token,$1)) basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget)) modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \ - -D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname)) + -D__KBUILD_MODNAME=$(call name-fix-token,$(modname)) modfile_flags = -DKBUILD_MODFILE=$(call stringify,$(modfile)) _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ -- cgit v1.2.3 From 4109043bff31f95d3da9ace33eb3c1925fd62cbd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:17 -0700 Subject: modpost: Ignore unresolved section bounds symbols In preparation for klp-build livepatch module creation tooling, suppress warnings for unresolved references to linker-generated __start_* and __stop_* section bounds symbols. These symbols are expected to be undefined when modpost runs, as they're created later by the linker. Cc: Masahiro Yamada Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/mod/modpost.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts') diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 47c8aa2a6939..755b842f1f9b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -606,6 +606,11 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) strstarts(symname, "_savevr_") || strcmp(symname, ".TOC.") == 0) return 1; + + /* ignore linker-created section bounds variables */ + if (strstarts(symname, "__start_") || strstarts(symname, "__stop_")) + return 1; + /* Do not ignore this symbol */ return 0; } -- cgit v1.2.3 From 56754f0f46f6a36ba66e8c1b2878f7a4f1edfe3b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:42 -0700 Subject: objtool: Rename --Werror to --werror The objtool --Werror option name is stylistically inconsistent: halfway between GCC's single-dash capitalized -Werror and objtool's double-dash --lowercase convention, making it unnecessarily hard to remember. Make the 'W' lower case (--werror) for consistency with objtool's other options. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 2 +- scripts/Makefile.vmlinux_o | 2 +- tools/objtool/builtin-check.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index b95560266124..15fee73e9289 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -191,7 +191,7 @@ objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess objtool-args-$(or $(CONFIG_GCOV_KERNEL),$(CONFIG_KCOV)) += --no-unreachable objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) -objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror +objtool-args-$(CONFIG_OBJTOOL_WERROR) += --werror objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 23c8751285d7..20533cc0b1ee 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -41,7 +41,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) ifeq ($(delay-objtool),y) vmlinux-objtool-args-y += $(objtool-args-y) else -vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror +vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --werror endif vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index fcd4a6517896..2aa28af8fb09 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -101,7 +101,7 @@ static const struct option check_options[] = { OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"), OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"), OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"), - OPT_BOOLEAN(0, "Werror", &opts.werror, "return error on warnings"), + OPT_BOOLEAN(0, "werror", &opts.werror, "return error on warnings"), OPT_END(), }; -- cgit v1.2.3 From dd590d4d57ebeeb826823c288741f2ed20f452af Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:59 -0700 Subject: objtool/klp: Introduce klp diff subcommand for diffing object files Add a new klp diff subcommand which performs a binary diff between two object files and extracts changed functions into a new object which can then be linked into a livepatch module. This builds on concepts from the longstanding out-of-tree kpatch [1] project which began in 2012 and has been used for many years to generate livepatch modules for production kernels. However, this is a complete rewrite which incorporates hard-earned lessons from 12+ years of maintaining kpatch. Key improvements compared to kpatch-build: - Integrated with objtool: Leverages objtool's existing control-flow graph analysis to help detect changed functions. - Works on vmlinux.o: Supports late-linked objects, making it compatible with LTO, IBT, and similar. - Simplified code base: ~3k fewer lines of code. - Upstream: No more out-of-tree #ifdef hacks, far less cruft. - Cleaner internals: Vastly simplified logic for symbol/section/reloc inclusion and special section extraction. - Robust __LINE__ macro handling: Avoids false positive binary diffs caused by the __LINE__ macro by introducing a fix-patch-lines script (coming in a later patch) which injects #line directives into the source .patch to preserve the original line numbers at compile time. Note the end result of this subcommand is not yet functionally complete. Livepatch needs some ELF magic which linkers don't like: - Two relocation sections (.rela*, .klp.rela*) for the same text section. - Use of SHN_LIVEPATCH to mark livepatch symbols. Unfortunately linkers tend to mangle such things. To work around that, klp diff generates a linker-compliant intermediate binary which encodes the relevant KLP section/reloc/symbol metadata. After module linking, a klp post-link step (coming soon) will clean up the mess and convert the linked .ko into a fully compliant livepatch module. Note this subcommand requires the diffed binaries to have been compiled with -ffunction-sections and -fdata-sections, and processed with 'objtool --checksum'. Those constraints will be handled by a klp-build script introduced in a later patch. Without '-ffunction-sections -fdata-sections', reliable object diffing would be infeasible due to toolchain limitations: - For intra-file+intra-section references, the compiler might occasionally generated hard-coded instruction offsets instead of relocations. - Section-symbol-based references can be ambiguous: - Overlapping or zero-length symbols create ambiguity as to which symbol is being referenced. - A reference to the end of a symbol (e.g., checking array bounds) can be misinterpreted as a reference to the next symbol, or vice versa. A potential future alternative to '-ffunction-sections -fdata-sections' would be to introduce a toolchain option that forces symbol-based (non-section) relocations. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- MAINTAINERS | 2 +- include/linux/livepatch.h | 25 +- include/linux/livepatch_external.h | 76 ++ kernel/livepatch/core.c | 4 +- scripts/module.lds.S | 10 +- tools/include/linux/livepatch_external.h | 76 ++ tools/include/linux/string.h | 14 + tools/objtool/Build | 4 +- tools/objtool/Makefile | 3 +- tools/objtool/arch/x86/decode.c | 40 + tools/objtool/builtin-klp.c | 52 + tools/objtool/check.c | 14 - tools/objtool/elf.c | 21 +- tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/builtin.h | 2 + tools/objtool/include/objtool/elf.h | 56 +- tools/objtool/include/objtool/klp.h | 31 + tools/objtool/include/objtool/objtool.h | 2 + tools/objtool/include/objtool/util.h | 19 + tools/objtool/klp-diff.c | 1646 ++++++++++++++++++++++++++++++ tools/objtool/objtool.c | 41 +- tools/objtool/sync-check.sh | 1 + tools/objtool/weak.c | 7 + 23 files changed, 2088 insertions(+), 59 deletions(-) create mode 100644 include/linux/livepatch_external.h create mode 100644 tools/include/linux/livepatch_external.h create mode 100644 tools/objtool/builtin-klp.c create mode 100644 tools/objtool/include/objtool/klp.h create mode 100644 tools/objtool/include/objtool/util.h create mode 100644 tools/objtool/klp-diff.c (limited to 'scripts') diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..755e2528f839 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14439,7 +14439,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/livepatching/livepatching.g F: Documentation/ABI/testing/sysfs-kernel-livepatch F: Documentation/livepatch/ F: arch/powerpc/include/asm/livepatch.h -F: include/linux/livepatch.h +F: include/linux/livepatch*.h F: kernel/livepatch/ F: kernel/module/livepatch.c F: samples/livepatch/ diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 51a258c24ff5..772919e8096a 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_LIVEPATCH) @@ -77,30 +78,6 @@ struct klp_func { bool transition; }; -struct klp_object; - -/** - * struct klp_callbacks - pre/post live-(un)patch callback structure - * @pre_patch: executed before code patching - * @post_patch: executed after code patching - * @pre_unpatch: executed before code unpatching - * @post_unpatch: executed after code unpatching - * @post_unpatch_enabled: flag indicating if post-unpatch callback - * should run - * - * All callbacks are optional. Only the pre-patch callback, if provided, - * will be unconditionally executed. If the parent klp_object fails to - * patch for any reason, including a non-zero error status returned from - * the pre-patch callback, no further callbacks will be executed. - */ -struct klp_callbacks { - int (*pre_patch)(struct klp_object *obj); - void (*post_patch)(struct klp_object *obj); - void (*pre_unpatch)(struct klp_object *obj); - void (*post_unpatch)(struct klp_object *obj); - bool post_unpatch_enabled; -}; - /** * struct klp_object - kernel object structure for live patching * @name: module name (or NULL for vmlinux) diff --git a/include/linux/livepatch_external.h b/include/linux/livepatch_external.h new file mode 100644 index 000000000000..138af19b0f5c --- /dev/null +++ b/include/linux/livepatch_external.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * External livepatch interfaces for patch creation tooling + */ + +#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_ +#define _LINUX_LIVEPATCH_EXTERNAL_H_ + +#include + +#define KLP_RELOC_SEC_PREFIX ".klp.rela." +#define KLP_SYM_PREFIX ".klp.sym." + +#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_ +#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_ +#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_ +#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_ + +#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX) +#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX) +#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX) +#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX) + +struct klp_object; + +typedef int (*klp_pre_patch_t)(struct klp_object *obj); +typedef void (*klp_post_patch_t)(struct klp_object *obj); +typedef void (*klp_pre_unpatch_t)(struct klp_object *obj); +typedef void (*klp_post_unpatch_t)(struct klp_object *obj); + +/** + * struct klp_callbacks - pre/post live-(un)patch callback structure + * @pre_patch: executed before code patching + * @post_patch: executed after code patching + * @pre_unpatch: executed before code unpatching + * @post_unpatch: executed after code unpatching + * @post_unpatch_enabled: flag indicating if post-unpatch callback + * should run + * + * All callbacks are optional. Only the pre-patch callback, if provided, + * will be unconditionally executed. If the parent klp_object fails to + * patch for any reason, including a non-zero error status returned from + * the pre-patch callback, no further callbacks will be executed. + */ +struct klp_callbacks { + klp_pre_patch_t pre_patch; + klp_post_patch_t post_patch; + klp_pre_unpatch_t pre_unpatch; + klp_post_unpatch_t post_unpatch; + bool post_unpatch_enabled; +}; + +/* + * 'struct klp_{func,object}_ext' are compact "external" representations of + * 'struct klp_{func,object}'. They are used by objtool for livepatch + * generation. The structs are then read by the livepatch module and converted + * to the real structs before calling klp_enable_patch(). + * + * TODO make these the official API for klp_enable_patch(). That should + * simplify livepatch's interface as well as its data structure lifetime + * management. + */ +struct klp_func_ext { + const char *old_name; + void *new_func; + unsigned long sympos; +}; + +struct klp_object_ext { + const char *name; + struct klp_func_ext *funcs; + struct klp_callbacks callbacks; + unsigned int nr_funcs; +}; + +#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */ diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 7e443c2cf7d4..0044a8125013 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -224,7 +224,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, /* Format: .klp.sym.sym_objname.sym_name,sympos */ cnt = sscanf(strtab + sym->st_name, - ".klp.sym.%55[^.].%511[^,],%lu", + KLP_SYM_PREFIX "%55[^.].%511[^,],%lu", sym_objname, sym_name, &sympos); if (cnt != 3) { pr_err("symbol %s has an incorrectly formatted name\n", @@ -303,7 +303,7 @@ static int klp_write_section_relocs(struct module *pmod, Elf_Shdr *sechdrs, * See comment in klp_resolve_symbols() for an explanation * of the selected field width value. */ - cnt = sscanf(shstrtab + sec->sh_name, ".klp.rela.%55[^.]", + cnt = sscanf(shstrtab + sec->sh_name, KLP_RELOC_SEC_PREFIX "%55[^.]", sec_objname); if (cnt != 1) { pr_err("section %s has an incorrectly formatted name\n", diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 2632c6cb8ebe..3037d5e5527c 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -34,8 +34,16 @@ SECTIONS { __patchable_function_entries : { *(__patchable_function_entries) } + __klp_funcs 0: ALIGN(8) { KEEP(*(__klp_funcs)) } + + __klp_objects 0: ALIGN(8) { + __start_klp_objects = .; + KEEP(*(__klp_objects)) + __stop_klp_objects = .; + } + #ifdef CONFIG_ARCH_USES_CFI_TRAPS - __kcfi_traps : { KEEP(*(.kcfi_traps)) } + __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif .text : { diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h new file mode 100644 index 000000000000..138af19b0f5c --- /dev/null +++ b/tools/include/linux/livepatch_external.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * External livepatch interfaces for patch creation tooling + */ + +#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_ +#define _LINUX_LIVEPATCH_EXTERNAL_H_ + +#include + +#define KLP_RELOC_SEC_PREFIX ".klp.rela." +#define KLP_SYM_PREFIX ".klp.sym." + +#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_ +#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_ +#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_ +#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_ + +#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX) +#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX) +#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX) +#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX) + +struct klp_object; + +typedef int (*klp_pre_patch_t)(struct klp_object *obj); +typedef void (*klp_post_patch_t)(struct klp_object *obj); +typedef void (*klp_pre_unpatch_t)(struct klp_object *obj); +typedef void (*klp_post_unpatch_t)(struct klp_object *obj); + +/** + * struct klp_callbacks - pre/post live-(un)patch callback structure + * @pre_patch: executed before code patching + * @post_patch: executed after code patching + * @pre_unpatch: executed before code unpatching + * @post_unpatch: executed after code unpatching + * @post_unpatch_enabled: flag indicating if post-unpatch callback + * should run + * + * All callbacks are optional. Only the pre-patch callback, if provided, + * will be unconditionally executed. If the parent klp_object fails to + * patch for any reason, including a non-zero error status returned from + * the pre-patch callback, no further callbacks will be executed. + */ +struct klp_callbacks { + klp_pre_patch_t pre_patch; + klp_post_patch_t post_patch; + klp_pre_unpatch_t pre_unpatch; + klp_post_unpatch_t post_unpatch; + bool post_unpatch_enabled; +}; + +/* + * 'struct klp_{func,object}_ext' are compact "external" representations of + * 'struct klp_{func,object}'. They are used by objtool for livepatch + * generation. The structs are then read by the livepatch module and converted + * to the real structs before calling klp_enable_patch(). + * + * TODO make these the official API for klp_enable_patch(). That should + * simplify livepatch's interface as well as its data structure lifetime + * management. + */ +struct klp_func_ext { + const char *old_name; + void *new_func; + unsigned long sympos; +}; + +struct klp_object_ext { + const char *name; + struct klp_func_ext *funcs; + struct klp_callbacks callbacks; + unsigned int nr_funcs; +}; + +#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */ diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 8499f509f03e..51ad3cf4fa82 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } +/* + * Checks if a string ends with another. + */ +static inline bool str_ends_with(const char *str, const char *substr) +{ + size_t len = strlen(str); + size_t sublen = strlen(substr); + + if (sublen > len) + return false; + + return !strcmp(str + len - sublen, substr); +} + extern char * __must_check skip_spaces(const char *); extern char *strim(char *); diff --git a/tools/objtool/Build b/tools/objtool/Build index a3cdf8af6635..0b01657671d7 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -8,8 +8,8 @@ objtool-y += builtin-check.o objtool-y += elf.o objtool-y += objtool.o -objtool-$(BUILD_ORC) += orc_gen.o -objtool-$(BUILD_ORC) += orc_dump.o +objtool-$(BUILD_ORC) += orc_gen.o orc_dump.o +objtool-$(BUILD_KLP) += builtin-klp.o klp-diff.o objtool-y += libstring.o objtool-y += libctype.o diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 958761c05b7c..48928c9bebef 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -15,13 +15,14 @@ ifeq ($(ARCH_HAS_KLP),y) HAVE_XXHASH = $(shell echo "int main() {}" | \ $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n) ifeq ($(HAVE_XXHASH),y) + BUILD_KLP := y LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \ -DBUILD_KLP LIBXXHASH_LIBS := $(shell $(HOSTPKG_CONFIG) libxxhash --libs 2>/dev/null || echo -lxxhash) endif endif -export BUILD_ORC +export BUILD_ORC BUILD_KLP ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index b2c320f701f9..5c72beeaa3a7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -88,6 +88,46 @@ s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc) return phys_to_virt(addend); } +static void scan_for_insn(struct section *sec, unsigned long offset, + unsigned long *insn_off, unsigned int *insn_len) +{ + unsigned long o = 0; + struct insn insn; + + while (1) { + + insn_decode(&insn, sec->data->d_buf + o, sec_size(sec) - o, + INSN_MODE_64); + + if (o + insn.length > offset) { + *insn_off = o; + *insn_len = insn.length; + return; + } + + o += insn.length; + } +} + +u64 arch_adjusted_addend(struct reloc *reloc) +{ + unsigned int type = reloc_type(reloc); + s64 addend = reloc_addend(reloc); + unsigned long insn_off; + unsigned int insn_len; + + if (type == R_X86_64_PLT32) + return addend + 4; + + if (type != R_X86_64_PC32 || !is_text_sec(reloc->sec->base)) + return addend; + + scan_for_insn(reloc->sec->base, reloc_offset(reloc), + &insn_off, &insn_len); + + return addend + insn_off + insn_len - reloc_offset(reloc); +} + unsigned long arch_jump_destination(struct instruction *insn) { return insn->offset + insn->len + insn->immediate; diff --git a/tools/objtool/builtin-klp.c b/tools/objtool/builtin-klp.c new file mode 100644 index 000000000000..9b13dd1182af --- /dev/null +++ b/tools/objtool/builtin-klp.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include +#include +#include + +struct subcmd { + const char *name; + const char *description; + int (*fn)(int, const char **); +}; + +static struct subcmd subcmds[] = { + { "diff", "Generate binary diff of two object files", cmd_klp_diff, }, +}; + +static void cmd_klp_usage(void) +{ + fprintf(stderr, "usage: objtool klp []\n\n"); + fprintf(stderr, "Subcommands:\n"); + + for (int i = 0; i < ARRAY_SIZE(subcmds); i++) { + struct subcmd *cmd = &subcmds[i]; + + fprintf(stderr, " %s\t%s\n", cmd->name, cmd->description); + } + + exit(1); +} + +int cmd_klp(int argc, const char **argv) +{ + argc--; + argv++; + + if (!argc) + cmd_klp_usage(); + + if (argc) { + for (int i = 0; i < ARRAY_SIZE(subcmds); i++) { + struct subcmd *cmd = &subcmds[i]; + + if (!strcmp(cmd->name, argv[0])) + return cmd->fn(argc, argv); + } + } + + cmd_klp_usage(); + return 0; +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0f5278127f37..8d17d930d0c8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -185,20 +185,6 @@ static bool is_sibling_call(struct instruction *insn) return (is_static_jump(insn) && insn_call_dest(insn)); } -/* - * Checks if a string ends with another. - */ -static bool str_ends_with(const char *s, const char *sub) -{ - const int slen = strlen(s); - const int sublen = strlen(sub); - - if (sublen > slen) - return 0; - - return !memcmp(s + slen - sublen, sub, sublen); -} - /* * Checks if a function is a Rust "noreturn" one. */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0119b3b4c554..e1daae0630be 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -288,6 +288,18 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name) return NULL; } +struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name) +{ + struct symbol *sym; + + elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) { + if (!strcmp(sym->name, name) && !is_local_sym(sym)) + return sym; + } + + return NULL; +} + struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len) { @@ -475,6 +487,8 @@ static int elf_add_symbol(struct elf *elf, struct symbol *sym) else entry = &sym->sec->symbol_list; list_add(&sym->list, entry); + + list_add_tail(&sym->global_list, &elf->symbols); elf_hash_add(symbol, &sym->hash, sym->idx); elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name)); @@ -531,6 +545,9 @@ static int read_symbols(struct elf *elf) ERROR_GLIBC("calloc"); return -1; } + + INIT_LIST_HEAD(&elf->symbols); + for (i = 0; i < symbols_nr; i++) { sym = &elf->symbol_data[i]; @@ -639,7 +656,7 @@ static int mark_group_syms(struct elf *elf) return -1; } - list_for_each_entry(sec, &elf->sections, list) { + for_each_sec(elf, sec) { if (sec->sh.sh_type == SHT_GROUP && sec->sh.sh_link == symtab->idx) { sym = find_symbol_by_index(elf, sec->sh.sh_info); @@ -1224,6 +1241,8 @@ struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name) return NULL; } + INIT_LIST_HEAD(&elf->symbols); + if (!elf_alloc_hash(section, 1000) || !elf_alloc_hash(section_name, 1000) || !elf_alloc_hash(symbol, 10000) || diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index a4502947307a..d89f8b5ec14e 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -84,6 +84,7 @@ bool arch_callee_saved_reg(unsigned char reg); unsigned long arch_jump_destination(struct instruction *insn); s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc); +u64 arch_adjusted_addend(struct reloc *reloc); const char *arch_nop_insn(int len); const char *arch_ret_insn(int len); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index cee9fc031877..bb0b25eb08ba 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -53,4 +53,6 @@ int objtool_run(int argc, const char **argv); int make_backup(void); +int cmd_klp(int argc, const char **argv); + #endif /* _BUILTIN_H */ diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index a1f1762f89c4..e2cd817fca52 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -18,6 +18,7 @@ #include #include +#define SEC_NAME_LEN 1024 #define SYM_NAME_LEN 512 #define bswap_if_needed(elf, val) __bswap_if_needed(&elf->ehdr, val) @@ -53,10 +54,12 @@ struct section { bool _changed, text, rodata, noinstr, init, truncate; struct reloc *relocs; unsigned long nr_alloc_relocs; + struct section *twin; }; struct symbol { struct list_head list; + struct list_head global_list; struct rb_node node; struct elf_hash_node hash; struct elf_hash_node name_hash; @@ -83,10 +86,13 @@ struct symbol { u8 cold : 1; u8 prefix : 1; u8 debug_checksum : 1; + u8 changed : 1; + u8 included : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; struct checksum csum; + struct symbol *twin, *clone; }; struct reloc { @@ -104,6 +110,7 @@ struct elf { const char *name, *tmp_name; unsigned int num_files; struct list_head sections; + struct list_head symbols; unsigned long num_relocs; int symbol_bits; @@ -179,6 +186,7 @@ struct section *find_section_by_name(const struct elf *elf, const char *name); struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); +struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name); struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); int find_symbol_hole_containing(const struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); @@ -448,22 +456,48 @@ static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next) #define sec_for_each_sym(sec, sym) \ list_for_each_entry(sym, &sec->symbol_list, list) +#define sec_prev_sym(sym) \ + sym->sec && sym->list.prev != &sym->sec->symbol_list ? \ + list_prev_entry(sym, list) : NULL + #define for_each_sym(elf, sym) \ - for (struct section *__sec, *__fake = (struct section *)1; \ - __fake; __fake = NULL) \ - for_each_sec(elf, __sec) \ - sec_for_each_sym(__sec, sym) + list_for_each_entry(sym, &elf->symbols, global_list) + +#define for_each_sym_continue(elf, sym) \ + list_for_each_entry_continue(sym, &elf->symbols, global_list) + +#define rsec_next_reloc(rsec, reloc) \ + reloc_idx(reloc) < sec_num_entries(rsec) - 1 ? reloc + 1 : NULL #define for_each_reloc(rsec, reloc) \ - for (int __i = 0, __fake = 1; __fake; __fake = 0) \ - for (reloc = rsec->relocs; \ - __i < sec_num_entries(rsec); \ - __i++, reloc++) + for (reloc = rsec->relocs; reloc; reloc = rsec_next_reloc(rsec, reloc)) #define for_each_reloc_from(rsec, reloc) \ - for (int __i = reloc_idx(reloc); \ - __i < sec_num_entries(rsec); \ - __i++, reloc++) + for (; reloc; reloc = rsec_next_reloc(rsec, reloc)) + +#define for_each_reloc_continue(rsec, reloc) \ + for (reloc = rsec_next_reloc(rsec, reloc); reloc; \ + reloc = rsec_next_reloc(rsec, reloc)) + +#define sym_for_each_reloc(elf, sym, reloc) \ + for (reloc = find_reloc_by_dest_range(elf, sym->sec, \ + sym->offset, sym->len); \ + reloc && reloc_offset(reloc) < sym->offset + sym->len; \ + reloc = rsec_next_reloc(sym->sec->rsec, reloc)) + +static inline struct symbol *get_func_prefix(struct symbol *func) +{ + struct symbol *prev; + + if (!is_func_sym(func)) + return NULL; + + prev = sec_prev_sym(func); + if (prev && is_prefix_func(prev)) + return prev; + + return NULL; +} #define OFFSET_STRIDE_BITS 4 #define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) diff --git a/tools/objtool/include/objtool/klp.h b/tools/objtool/include/objtool/klp.h new file mode 100644 index 000000000000..07928fac059b --- /dev/null +++ b/tools/objtool/include/objtool/klp.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _OBJTOOL_KLP_H +#define _OBJTOOL_KLP_H + +/* + * __klp_objects and __klp_funcs are created by klp diff and used by the patch + * module init code to build the klp_patch, klp_object and klp_func structs + * needed by the livepatch API. + */ +#define KLP_OBJECTS_SEC "__klp_objects" +#define KLP_FUNCS_SEC "__klp_funcs" + +/* + * __klp_relocs is an intermediate section which are created by klp diff and + * converted into KLP symbols/relas by "objtool klp post-link". This is needed + * to work around the linker, which doesn't preserve SHN_LIVEPATCH or + * SHF_RELA_LIVEPATCH, nor does it support having two RELA sections for a + * single PROGBITS section. + */ +#define KLP_RELOCS_SEC "__klp_relocs" +#define KLP_STRINGS_SEC ".rodata.klp.str1.1" + +struct klp_reloc { + void *offset; + void *sym; + u32 type; +}; + +int cmd_klp_diff(int argc, const char **argv); + +#endif /* _OBJTOOL_KLP_H */ diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index c0dc86a78ff6..7f70b41d1b8d 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -39,6 +39,8 @@ struct objtool_file { struct pv_state *pv_ops; }; +char *top_level_dir(const char *file); + struct objtool_file *objtool_open_read(const char *_objname); int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func); diff --git a/tools/objtool/include/objtool/util.h b/tools/objtool/include/objtool/util.h new file mode 100644 index 000000000000..a0180b312f73 --- /dev/null +++ b/tools/objtool/include/objtool/util.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _UTIL_H +#define _UTIL_H + +#include + +#define snprintf_check(str, size, format, args...) \ +({ \ + int __ret = snprintf(str, size, format, args); \ + if (__ret < 0) \ + ERROR_GLIBC("snprintf"); \ + else if (__ret >= size) \ + ERROR("snprintf() failed for '" format "'", args); \ + else \ + __ret = 0; \ + __ret; \ +}) + +#endif /* _UTIL_H */ diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c new file mode 100644 index 000000000000..0d69b621a26c --- /dev/null +++ b/tools/objtool/klp-diff.c @@ -0,0 +1,1646 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#define _GNU_SOURCE /* memmem() */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + +struct elfs { + struct elf *orig, *patched, *out; + const char *modname; +}; + +struct export { + struct hlist_node hash; + char *mod, *sym; +}; + +static const char * const klp_diff_usage[] = { + "objtool klp diff [] ", + NULL, +}; + +static const struct option klp_diff_options[] = { + OPT_END(), +}; + +static DEFINE_HASHTABLE(exports, 15); + +static inline u32 str_hash(const char *str) +{ + return jhash(str, strlen(str), 0); +} + +static int read_exports(void) +{ + const char *symvers = "Module.symvers"; + char line[1024], *path = NULL; + unsigned int line_num = 1; + FILE *file; + + file = fopen(symvers, "r"); + if (!file) { + path = top_level_dir(symvers); + if (!path) { + ERROR("can't open '%s', \"objtool diff\" should be run from the kernel tree", symvers); + return -1; + } + + file = fopen(path, "r"); + if (!file) { + ERROR_GLIBC("fopen"); + return -1; + } + } + + while (fgets(line, 1024, file)) { + char *sym, *mod, *type; + struct export *export; + + sym = strchr(line, '\t'); + if (!sym) { + ERROR("malformed Module.symvers (sym) at line %d", line_num); + return -1; + } + + *sym++ = '\0'; + + mod = strchr(sym, '\t'); + if (!mod) { + ERROR("malformed Module.symvers (mod) at line %d", line_num); + return -1; + } + + *mod++ = '\0'; + + type = strchr(mod, '\t'); + if (!type) { + ERROR("malformed Module.symvers (type) at line %d", line_num); + return -1; + } + + *type++ = '\0'; + + if (*sym == '\0' || *mod == '\0') { + ERROR("malformed Module.symvers at line %d", line_num); + return -1; + } + + export = calloc(1, sizeof(*export)); + if (!export) { + ERROR_GLIBC("calloc"); + return -1; + } + + export->mod = strdup(mod); + if (!export->mod) { + ERROR_GLIBC("strdup"); + return -1; + } + + export->sym = strdup(sym); + if (!export->sym) { + ERROR_GLIBC("strdup"); + return -1; + } + + hash_add(exports, &export->hash, str_hash(sym)); + } + + free(path); + fclose(file); + + return 0; +} + +static int read_sym_checksums(struct elf *elf) +{ + struct section *sec; + + sec = find_section_by_name(elf, ".discard.sym_checksum"); + if (!sec) { + ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool --checksum'?", + elf->name); + return -1; + } + + if (!sec->rsec) { + ERROR("missing reloc section for .discard.sym_checksum"); + return -1; + } + + if (sec_size(sec) % sizeof(struct sym_checksum)) { + ERROR("struct sym_checksum size mismatch"); + return -1; + } + + for (int i = 0; i < sec_size(sec) / sizeof(struct sym_checksum); i++) { + struct sym_checksum *sym_checksum; + struct reloc *reloc; + struct symbol *sym; + + sym_checksum = (struct sym_checksum *)sec->data->d_buf + i; + + reloc = find_reloc_by_dest(elf, sec, i * sizeof(*sym_checksum)); + if (!reloc) { + ERROR("can't find reloc for sym_checksum[%d]", i); + return -1; + } + + sym = reloc->sym; + + if (is_sec_sym(sym)) { + ERROR("not sure how to handle section %s", sym->name); + return -1; + } + + if (is_func_sym(sym)) + sym->csum.checksum = sym_checksum->checksum; + } + + return 0; +} + +static struct symbol *first_file_symbol(struct elf *elf) +{ + struct symbol *sym; + + for_each_sym(elf, sym) { + if (is_file_sym(sym)) + return sym; + } + + return NULL; +} + +static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym) +{ + for_each_sym_continue(elf, sym) { + if (is_file_sym(sym)) + return sym; + } + + return NULL; +} + +/* + * Certain static local variables should never be correlated. They will be + * used in place rather than referencing the originals. + */ +static bool is_uncorrelated_static_local(struct symbol *sym) +{ + static const char * const vars[] = { + "__key.", + "__warned.", + "__already_done.", + "__func__.", + "_rs.", + "descriptor.", + "CSWTCH.", + }; + + if (!is_object_sym(sym) || !is_local_sym(sym)) + return false; + + if (!strcmp(sym->sec->name, ".data.once")) + return true; + + for (int i = 0; i < ARRAY_SIZE(vars); i++) { + if (strstarts(sym->name, vars[i])) + return true; + } + + return false; +} + +/* + * Clang emits several useless .Ltmp_* code labels. + */ +static bool is_clang_tmp_label(struct symbol *sym) +{ + return sym->type == STT_NOTYPE && + is_text_sec(sym->sec) && + strstarts(sym->name, ".Ltmp") && + isdigit(sym->name[5]); +} + +static bool is_special_section(struct section *sec) +{ + static const char * const specials[] = { + ".altinstructions", + ".smp_locks", + "__bug_table", + "__ex_table", + "__jump_table", + "__mcount_loc", + + /* + * Extract .static_call_sites here to inherit non-module + * preferential treatment. The later static call processing + * during klp module build will be skipped when it sees this + * section already exists. + */ + ".static_call_sites", + }; + + static const char * const non_special_discards[] = { + ".discard.addressable", + ".discard.sym_checksum", + }; + + if (is_text_sec(sec)) + return false; + + for (int i = 0; i < ARRAY_SIZE(specials); i++) { + if (!strcmp(sec->name, specials[i])) + return true; + } + + /* Most .discard data sections are special */ + for (int i = 0; i < ARRAY_SIZE(non_special_discards); i++) { + if (!strcmp(sec->name, non_special_discards[i])) + return false; + } + + return strstarts(sec->name, ".discard."); +} + +/* + * These sections are referenced by special sections but aren't considered + * special sections themselves. + */ +static bool is_special_section_aux(struct section *sec) +{ + static const char * const specials_aux[] = { + ".altinstr_replacement", + ".altinstr_aux", + }; + + for (int i = 0; i < ARRAY_SIZE(specials_aux); i++) { + if (!strcmp(sec->name, specials_aux[i])) + return true; + } + + return false; +} + +/* + * These symbols should never be correlated, so their local patched versions + * are used instead of linking to the originals. + */ +static bool dont_correlate(struct symbol *sym) +{ + return is_file_sym(sym) || + is_null_sym(sym) || + is_sec_sym(sym) || + is_prefix_func(sym) || + is_uncorrelated_static_local(sym) || + is_clang_tmp_label(sym) || + is_string_sec(sym->sec) || + is_special_section(sym->sec) || + is_special_section_aux(sym->sec) || + strstarts(sym->name, "__initcall__"); +} + +/* + * For each symbol in the original kernel, find its corresponding "twin" in the + * patched kernel. + */ +static int correlate_symbols(struct elfs *e) +{ + struct symbol *file1_sym, *file2_sym; + struct symbol *sym1, *sym2; + + /* Correlate locals */ + for (file1_sym = first_file_symbol(e->orig), + file2_sym = first_file_symbol(e->patched); ; + file1_sym = next_file_symbol(e->orig, file1_sym), + file2_sym = next_file_symbol(e->patched, file2_sym)) { + + if (!file1_sym && file2_sym) { + ERROR("FILE symbol mismatch: NULL != %s", file2_sym->name); + return -1; + } + + if (file1_sym && !file2_sym) { + ERROR("FILE symbol mismatch: %s != NULL", file1_sym->name); + return -1; + } + + if (!file1_sym) + break; + + if (strcmp(file1_sym->name, file2_sym->name)) { + ERROR("FILE symbol mismatch: %s != %s", file1_sym->name, file2_sym->name); + return -1; + } + + file1_sym->twin = file2_sym; + file2_sym->twin = file1_sym; + + sym1 = file1_sym; + + for_each_sym_continue(e->orig, sym1) { + if (is_file_sym(sym1) || !is_local_sym(sym1)) + break; + + if (dont_correlate(sym1)) + continue; + + sym2 = file2_sym; + for_each_sym_continue(e->patched, sym2) { + if (is_file_sym(sym2) || !is_local_sym(sym2)) + break; + + if (sym2->twin || dont_correlate(sym2)) + continue; + + if (strcmp(sym1->demangled_name, sym2->demangled_name)) + continue; + + sym1->twin = sym2; + sym2->twin = sym1; + break; + } + } + } + + /* Correlate globals */ + for_each_sym(e->orig, sym1) { + if (sym1->bind == STB_LOCAL) + continue; + + sym2 = find_global_symbol_by_name(e->patched, sym1->name); + + if (sym2 && !sym2->twin && !strcmp(sym1->name, sym2->name)) { + sym1->twin = sym2; + sym2->twin = sym1; + } + } + + for_each_sym(e->orig, sym1) { + if (sym1->twin || dont_correlate(sym1)) + continue; + WARN("no correlation: %s", sym1->name); + } + + return 0; +} + +/* "sympos" is used by livepatch to disambiguate duplicate symbol names */ +static unsigned long find_sympos(struct elf *elf, struct symbol *sym) +{ + bool vmlinux = str_ends_with(objname, "vmlinux.o"); + unsigned long sympos = 0, nr_matches = 0; + bool has_dup = false; + struct symbol *s; + + if (sym->bind != STB_LOCAL) + return 0; + + if (vmlinux && sym->type == STT_FUNC) { + /* + * HACK: Unfortunately, symbol ordering can differ between + * vmlinux.o and vmlinux due to the linker script emitting + * .text.unlikely* before .text*. Count .text.unlikely* first. + * + * TODO: Disambiguate symbols more reliably (checksums?) + */ + for_each_sym(elf, s) { + if (strstarts(s->sec->name, ".text.unlikely") && + !strcmp(s->name, sym->name)) { + nr_matches++; + if (s == sym) + sympos = nr_matches; + else + has_dup = true; + } + } + for_each_sym(elf, s) { + if (!strstarts(s->sec->name, ".text.unlikely") && + !strcmp(s->name, sym->name)) { + nr_matches++; + if (s == sym) + sympos = nr_matches; + else + has_dup = true; + } + } + } else { + for_each_sym(elf, s) { + if (!strcmp(s->name, sym->name)) { + nr_matches++; + if (s == sym) + sympos = nr_matches; + else + has_dup = true; + } + } + } + + if (!sympos) { + ERROR("can't find sympos for %s", sym->name); + return ULONG_MAX; + } + + return has_dup ? sympos : 0; +} + +static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym); + +static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym, + bool data_too) +{ + struct section *out_sec = NULL; + unsigned long offset = 0; + struct symbol *out_sym; + + if (data_too && !is_undef_sym(patched_sym)) { + struct section *patched_sec = patched_sym->sec; + + out_sec = find_section_by_name(elf, patched_sec->name); + if (!out_sec) { + out_sec = elf_create_section(elf, patched_sec->name, 0, + patched_sec->sh.sh_entsize, + patched_sec->sh.sh_type, + patched_sec->sh.sh_addralign, + patched_sec->sh.sh_flags); + if (!out_sec) + return NULL; + } + + if (is_string_sec(patched_sym->sec)) { + out_sym = elf_create_section_symbol(elf, out_sec); + if (!out_sym) + return NULL; + + goto sym_created; + } + + if (!is_sec_sym(patched_sym)) + offset = sec_size(out_sec); + + if (patched_sym->len || is_sec_sym(patched_sym)) { + void *data = NULL; + size_t size; + + /* bss doesn't have data */ + if (patched_sym->sec->data->d_buf) + data = patched_sym->sec->data->d_buf + patched_sym->offset; + + if (is_sec_sym(patched_sym)) + size = sec_size(patched_sym->sec); + else + size = patched_sym->len; + + if (!elf_add_data(elf, out_sec, data, size)) + return NULL; + } + } + + out_sym = elf_create_symbol(elf, patched_sym->name, out_sec, + patched_sym->bind, patched_sym->type, + offset, patched_sym->len); + if (!out_sym) + return NULL; + +sym_created: + patched_sym->clone = out_sym; + out_sym->clone = patched_sym; + + return out_sym; +} + +/* + * Copy a symbol to the output object, optionally including its data and + * relocations. + */ +static struct symbol *clone_symbol(struct elfs *e, struct symbol *patched_sym, + bool data_too) +{ + struct symbol *pfx; + + if (patched_sym->clone) + return patched_sym->clone; + + /* Make sure the prefix gets cloned first */ + if (is_func_sym(patched_sym) && data_too) { + pfx = get_func_prefix(patched_sym); + if (pfx) + clone_symbol(e, pfx, true); + } + + if (!__clone_symbol(e->out, patched_sym, data_too)) + return NULL; + + if (data_too && clone_sym_relocs(e, patched_sym)) + return NULL; + + return patched_sym->clone; +} + +static void mark_included_function(struct symbol *func) +{ + struct symbol *pfx; + + func->included = 1; + + /* Include prefix function */ + pfx = get_func_prefix(func); + if (pfx) + pfx->included = 1; + + /* Make sure .cold parent+child always stay together */ + if (func->cfunc && func->cfunc != func) + func->cfunc->included = 1; + if (func->pfunc && func->pfunc != func) + func->pfunc->included = 1; +} + +/* + * Copy all changed functions (and their dependencies) from the patched object + * to the output object. + */ +static int mark_changed_functions(struct elfs *e) +{ + struct symbol *sym_orig, *patched_sym; + bool changed = false; + + /* Find changed functions */ + for_each_sym(e->orig, sym_orig) { + if (!is_func_sym(sym_orig) || is_prefix_func(sym_orig)) + continue; + + patched_sym = sym_orig->twin; + if (!patched_sym) + continue; + + if (sym_orig->csum.checksum != patched_sym->csum.checksum) { + patched_sym->changed = 1; + mark_included_function(patched_sym); + changed = true; + } + } + + /* Find added functions and print them */ + for_each_sym(e->patched, patched_sym) { + if (!is_func_sym(patched_sym) || is_prefix_func(patched_sym)) + continue; + + if (!patched_sym->twin) { + printf("%s: new function: %s\n", objname, patched_sym->name); + mark_included_function(patched_sym); + changed = true; + } + } + + /* Print changed functions */ + for_each_sym(e->patched, patched_sym) { + if (patched_sym->changed) + printf("%s: changed function: %s\n", objname, patched_sym->name); + } + + return !changed ? -1 : 0; +} + +static int clone_included_functions(struct elfs *e) +{ + struct symbol *patched_sym; + + for_each_sym(e->patched, patched_sym) { + if (patched_sym->included) { + if (!clone_symbol(e, patched_sym, true)) + return -1; + } + } + + return 0; +} + +/* + * Determine whether a relocation should reference the section rather than the + * underlying symbol. + */ +static bool section_reference_needed(struct section *sec) +{ + /* + * String symbols are zero-length and uncorrelated. It's easier to + * deal with them as section symbols. + */ + if (is_string_sec(sec)) + return true; + + /* + * .rodata has mostly anonymous data so there's no way to determine the + * length of a needed reference. just copy the whole section if needed. + */ + if (strstarts(sec->name, ".rodata")) + return true; + + /* UBSAN anonymous data */ + if (strstarts(sec->name, ".data..Lubsan") || /* GCC */ + strstarts(sec->name, ".data..L__unnamed_")) /* Clang */ + return true; + + return false; +} + +static bool is_reloc_allowed(struct reloc *reloc) +{ + return section_reference_needed(reloc->sym->sec) == is_sec_sym(reloc->sym); +} + +static struct export *find_export(struct symbol *sym) +{ + struct export *export; + + hash_for_each_possible(exports, export, hash, str_hash(sym->name)) { + if (!strcmp(export->sym, sym->name)) + return export; + } + + return NULL; +} + +static const char *__find_modname(struct elfs *e) +{ + struct section *sec; + char *name; + + sec = find_section_by_name(e->orig, ".modinfo"); + if (!sec) { + ERROR("missing .modinfo section"); + return NULL; + } + + name = memmem(sec->data->d_buf, sec_size(sec), "\0name=", 6); + if (name) + return name + 6; + + name = strdup(e->orig->name); + if (!name) { + ERROR_GLIBC("strdup"); + return NULL; + } + + for (char *c = name; *c; c++) { + if (*c == '/') + name = c + 1; + else if (*c == '-') + *c = '_'; + else if (*c == '.') { + *c = '\0'; + break; + } + } + + return name; +} + +/* Get the object's module name as defined by the kernel (and klp_object) */ +static const char *find_modname(struct elfs *e) +{ + const char *modname; + + if (e->modname) + return e->modname; + + modname = __find_modname(e); + e->modname = modname; + return modname; +} + +/* + * Copying a function from its native compiled environment to a kernel module + * removes its natural access to local functions/variables and unexported + * globals. References to such symbols need to be converted to KLP relocs so + * the kernel arch relocation code knows to apply them and where to find the + * symbols. Particularly, duplicate static symbols need to be disambiguated. + */ +static bool klp_reloc_needed(struct reloc *patched_reloc) +{ + struct symbol *patched_sym = patched_reloc->sym; + struct export *export; + + /* no external symbol to reference */ + if (dont_correlate(patched_sym)) + return false; + + /* For included functions, a regular reloc will do. */ + if (patched_sym->included) + return false; + + /* + * If exported by a module, it has to be a klp reloc. Thanks to the + * clusterfunk that is late module patching, the patch module is + * allowed to be loaded before any modules it depends on. + * + * If exported by vmlinux, a normal reloc will do. + */ + export = find_export(patched_sym); + if (export) + return strcmp(export->mod, "vmlinux"); + + if (!patched_sym->twin) { + /* + * Presumably the symbol and its reference were added by the + * patch. The symbol could be defined in this .o or in another + * .o in the patch module. + * + * This check needs to be *after* the export check due to the + * possibility of the patch adding a new UNDEF reference to an + * exported symbol. + */ + return false; + } + + /* Unexported symbol which lives in the original vmlinux or module. */ + return true; +} + +static int convert_reloc_sym_to_secsym(struct elf *elf, struct reloc *reloc) +{ + struct symbol *sym = reloc->sym; + struct section *sec = sym->sec; + + if (!sec->sym && !elf_create_section_symbol(elf, sec)) + return -1; + + reloc->sym = sec->sym; + set_reloc_sym(elf, reloc, sym->idx); + set_reloc_addend(elf, reloc, sym->offset + reloc_addend(reloc)); + return 0; +} + +static int convert_reloc_secsym_to_sym(struct elf *elf, struct reloc *reloc) +{ + struct symbol *sym = reloc->sym; + struct section *sec = sym->sec; + + /* If the symbol has a dedicated section, it's easy to find */ + sym = find_symbol_by_offset(sec, 0); + if (sym && sym->len == sec_size(sec)) + goto found_sym; + + /* No dedicated section; find the symbol manually */ + sym = find_symbol_containing(sec, arch_adjusted_addend(reloc)); + if (!sym) { + /* + * This can happen for special section references to weak code + * whose symbol has been stripped by the linker. + */ + return -1; + } + +found_sym: + reloc->sym = sym; + set_reloc_sym(elf, reloc, sym->idx); + set_reloc_addend(elf, reloc, reloc_addend(reloc) - sym->offset); + return 0; +} + +/* + * Convert a relocation symbol reference to the needed format: either a section + * symbol or the underlying symbol itself. + */ +static int convert_reloc_sym(struct elf *elf, struct reloc *reloc) +{ + if (is_reloc_allowed(reloc)) + return 0; + + if (section_reference_needed(reloc->sym->sec)) + return convert_reloc_sym_to_secsym(elf, reloc); + else + return convert_reloc_secsym_to_sym(elf, reloc); +} + +/* + * Convert a regular relocation to a klp relocation (sort of). + */ +static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc, + struct section *sec, unsigned long offset, + struct export *export) +{ + struct symbol *patched_sym = patched_reloc->sym; + s64 addend = reloc_addend(patched_reloc); + const char *sym_modname, *sym_orig_name; + static struct section *klp_relocs; + struct symbol *sym, *klp_sym; + unsigned long klp_reloc_off; + char sym_name[SYM_NAME_LEN]; + struct klp_reloc klp_reloc; + unsigned long sympos; + + if (!patched_sym->twin) { + ERROR("unexpected klp reloc for new symbol %s", patched_sym->name); + return -1; + } + + /* + * Keep the original reloc intact for now to avoid breaking objtool run + * which relies on proper relocations for many of its features. This + * will be disabled later by "objtool klp post-link". + * + * Convert it to UNDEF (and WEAK to avoid modpost warnings). + */ + + sym = patched_sym->clone; + if (!sym) { + /* STB_WEAK: avoid modpost undefined symbol warnings */ + sym = elf_create_symbol(e->out, patched_sym->name, NULL, + STB_WEAK, patched_sym->type, 0, 0); + if (!sym) + return -1; + + patched_sym->clone = sym; + sym->clone = patched_sym; + } + + if (!elf_create_reloc(e->out, sec, offset, sym, addend, reloc_type(patched_reloc))) + return -1; + + /* + * Create the KLP symbol. + */ + + if (export) { + sym_modname = export->mod; + sym_orig_name = export->sym; + sympos = 0; + } else { + sym_modname = find_modname(e); + if (!sym_modname) + return -1; + + sym_orig_name = patched_sym->twin->name; + sympos = find_sympos(e->orig, patched_sym->twin); + if (sympos == ULONG_MAX) + return -1; + } + + /* symbol format: .klp.sym.modname.sym_name,sympos */ + if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_SYM_PREFIX "%s.%s,%ld", + sym_modname, sym_orig_name, sympos)) + return -1; + + klp_sym = find_symbol_by_name(e->out, sym_name); + if (!klp_sym) { + /* STB_WEAK: avoid modpost undefined symbol warnings */ + klp_sym = elf_create_symbol(e->out, sym_name, NULL, + STB_WEAK, patched_sym->type, 0, 0); + if (!klp_sym) + return -1; + } + + /* + * Create the __klp_relocs entry. This will be converted to an actual + * KLP rela by "objtool klp post-link". + * + * This intermediate step is necessary to prevent corruption by the + * linker, which doesn't know how to properly handle two rela sections + * applying to the same base section. + */ + + if (!klp_relocs) { + klp_relocs = elf_create_section(e->out, KLP_RELOCS_SEC, 0, + 0, SHT_PROGBITS, 8, SHF_ALLOC); + if (!klp_relocs) + return -1; + } + + klp_reloc_off = sec_size(klp_relocs); + memset(&klp_reloc, 0, sizeof(klp_reloc)); + + klp_reloc.type = reloc_type(patched_reloc); + if (!elf_add_data(e->out, klp_relocs, &klp_reloc, sizeof(klp_reloc))) + return -1; + + /* klp_reloc.offset */ + if (!sec->sym && !elf_create_section_symbol(e->out, sec)) + return -1; + + if (!elf_create_reloc(e->out, klp_relocs, + klp_reloc_off + offsetof(struct klp_reloc, offset), + sec->sym, offset, R_ABS64)) + return -1; + + /* klp_reloc.sym */ + if (!elf_create_reloc(e->out, klp_relocs, + klp_reloc_off + offsetof(struct klp_reloc, sym), + klp_sym, addend, R_ABS64)) + return -1; + + return 0; +} + +/* Copy a reloc and its symbol to the output object */ +static int clone_reloc(struct elfs *e, struct reloc *patched_reloc, + struct section *sec, unsigned long offset) +{ + struct symbol *patched_sym = patched_reloc->sym; + struct export *export = find_export(patched_sym); + long addend = reloc_addend(patched_reloc); + struct symbol *out_sym; + bool klp; + + if (!is_reloc_allowed(patched_reloc)) { + ERROR_FUNC(patched_reloc->sec->base, reloc_offset(patched_reloc), + "missing symbol for reference to %s+%ld", + patched_sym->name, addend); + return -1; + } + + klp = klp_reloc_needed(patched_reloc); + + if (klp) { + if (clone_reloc_klp(e, patched_reloc, sec, offset, export)) + return -1; + + return 0; + } + + /* + * Why !export sets 'data_too': + * + * Unexported non-klp symbols need to live in the patch module, + * otherwise there will be unresolved symbols. Notably, this includes: + * + * - New functions/data + * - String sections + * - Special section entries + * - Uncorrelated static local variables + * - UBSAN sections + */ + out_sym = clone_symbol(e, patched_sym, patched_sym->included || !export); + if (!out_sym) + return -1; + + /* + * For strings, all references use section symbols, thanks to + * section_reference_needed(). clone_symbol() has cloned an empty + * version of the string section. Now copy the string itself. + */ + if (is_string_sec(patched_sym->sec)) { + const char *str = patched_sym->sec->data->d_buf + addend; + + addend = elf_add_string(e->out, out_sym->sec, str); + if (addend == -1) + return -1; + } + + if (!elf_create_reloc(e->out, sec, offset, out_sym, addend, + reloc_type(patched_reloc))) + return -1; + + return 0; +} + +/* Copy all relocs needed for a symbol's contents */ +static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym) +{ + struct section *patched_rsec = patched_sym->sec->rsec; + struct reloc *patched_reloc; + unsigned long start, end; + struct symbol *out_sym; + + out_sym = patched_sym->clone; + if (!out_sym) { + ERROR("no clone for %s", patched_sym->name); + return -1; + } + + if (!patched_rsec) + return 0; + + if (!is_sec_sym(patched_sym) && !patched_sym->len) + return 0; + + if (is_string_sec(patched_sym->sec)) + return 0; + + if (is_sec_sym(patched_sym)) { + start = 0; + end = sec_size(patched_sym->sec); + } else { + start = patched_sym->offset; + end = start + patched_sym->len; + } + + for_each_reloc(patched_rsec, patched_reloc) { + unsigned long offset; + + if (reloc_offset(patched_reloc) < start || + reloc_offset(patched_reloc) >= end) + continue; + + /* + * Skip any reloc referencing .altinstr_aux. Its code is + * always patched by alternatives. See ALTERNATIVE_TERNARY(). + */ + if (patched_reloc->sym->sec && + !strcmp(patched_reloc->sym->sec->name, ".altinstr_aux")) + continue; + + if (convert_reloc_sym(e->patched, patched_reloc)) { + ERROR_FUNC(patched_rsec->base, reloc_offset(patched_reloc), + "failed to convert reloc sym '%s' to its proper format", + patched_reloc->sym->name); + return -1; + } + + offset = out_sym->offset + (reloc_offset(patched_reloc) - patched_sym->offset); + + if (clone_reloc(e, patched_reloc, out_sym->sec, offset)) + return -1; + } + return 0; + +} + +static int create_fake_symbol(struct elf *elf, struct section *sec, + unsigned long offset, size_t size) +{ + char name[SYM_NAME_LEN]; + unsigned int type; + static int ctr; + char *c; + + if (snprintf_check(name, SYM_NAME_LEN, "%s_%d", sec->name, ctr++)) + return -1; + + for (c = name; *c; c++) + if (*c == '.') + *c = '_'; + + /* + * STT_NOTYPE: Prevent objtool from validating .altinstr_replacement + * while still allowing objdump to disassemble it. + */ + type = is_text_sec(sec) ? STT_NOTYPE : STT_OBJECT; + return elf_create_symbol(elf, name, sec, STB_LOCAL, type, offset, size) ? 0 : -1; +} + +/* + * Special sections (alternatives, etc) are basically arrays of structs. + * For all the special sections, create a symbol for each struct entry. This + * is a bit cumbersome, but it makes the extracting of the individual entries + * much more straightforward. + * + * There are three ways to identify the entry sizes for a special section: + * + * 1) ELF section header sh_entsize: Ideally this would be used almost + * everywhere. But unfortunately the toolchains make it difficult. The + * assembler .[push]section directive syntax only takes entsize when + * combined with SHF_MERGE. But Clang disallows combining SHF_MERGE with + * SHF_WRITE. And some special sections do need to be writable. + * + * Another place this wouldn't work is .altinstr_replacement, whose entries + * don't have a fixed size. + * + * 2) ANNOTATE_DATA_SPECIAL: This is a lightweight objtool annotation which + * points to the beginning of each entry. The size of the entry is then + * inferred by the location of the subsequent annotation (or end of + * section). + * + * 3) Simple array of pointers: If the special section is just a basic array of + * pointers, the entry size can be inferred by the number of relocations. + * No annotations needed. + * + * Note I also tried to create per-entry symbols at the time of creation, in + * the original [inline] asm. Unfortunately, creating uniquely named symbols + * is trickier than one might think, especially with Clang inline asm. I + * eventually just gave up trying to make that work, in favor of using + * ANNOTATE_DATA_SPECIAL and creating the symbols here after the fact. + */ +static int create_fake_symbols(struct elf *elf) +{ + struct section *sec; + struct reloc *reloc; + + /* + * 1) Make symbols for all the ANNOTATE_DATA_SPECIAL entries: + */ + + sec = find_section_by_name(elf, ".discard.annotate_data"); + if (!sec || !sec->rsec) + return 0; + + for_each_reloc(sec->rsec, reloc) { + unsigned long offset, size; + struct reloc *next_reloc; + + if (annotype(elf, sec, reloc) != ANNOTYPE_DATA_SPECIAL) + continue; + + offset = reloc_addend(reloc); + + size = 0; + next_reloc = reloc; + for_each_reloc_continue(sec->rsec, next_reloc) { + if (annotype(elf, sec, next_reloc) != ANNOTYPE_DATA_SPECIAL || + next_reloc->sym->sec != reloc->sym->sec) + continue; + + size = reloc_addend(next_reloc) - offset; + break; + } + + if (!size) + size = sec_size(reloc->sym->sec) - offset; + + if (create_fake_symbol(elf, reloc->sym->sec, offset, size)) + return -1; + } + + /* + * 2) Make symbols for sh_entsize, and simple arrays of pointers: + */ + + for_each_sec(elf, sec) { + unsigned int entry_size; + unsigned long offset; + + if (!is_special_section(sec) || find_symbol_by_offset(sec, 0)) + continue; + + if (!sec->rsec) { + ERROR("%s: missing special section relocations", sec->name); + return -1; + } + + entry_size = sec->sh.sh_entsize; + if (!entry_size) { + entry_size = arch_reloc_size(sec->rsec->relocs); + if (sec_size(sec) != entry_size * sec_num_entries(sec->rsec)) { + ERROR("%s: missing special section entsize or annotations", sec->name); + return -1; + } + } + + for (offset = 0; offset < sec_size(sec); offset += entry_size) { + if (create_fake_symbol(elf, sec, offset, entry_size)) + return -1; + } + } + + return 0; +} + +/* Keep a special section entry if it references an included function */ +static bool should_keep_special_sym(struct elf *elf, struct symbol *sym) +{ + struct reloc *reloc; + + if (is_sec_sym(sym) || !sym->sec->rsec) + return false; + + sym_for_each_reloc(elf, sym, reloc) { + if (convert_reloc_sym(elf, reloc)) + continue; + + if (is_func_sym(reloc->sym) && reloc->sym->included) + return true; + } + + return false; +} + +/* + * Klp relocations aren't allowed for __jump_table and .static_call_sites if + * the referenced symbol lives in a kernel module, because such klp relocs may + * be applied after static branch/call init, resulting in code corruption. + * + * Validate a special section entry to avoid that. Note that an inert + * tracepoint is harmless enough, in that case just skip the entry and print a + * warning. Otherwise, return an error. + * + * This is only a temporary limitation which will be fixed when livepatch adds + * support for submodules: fully self-contained modules which are embedded in + * the top-level livepatch module's data and which can be loaded on demand when + * their corresponding to-be-patched module gets loaded. Then klp relocs can + * be retired. + * + * Return: + * -1: error: validation failed + * 1: warning: tracepoint skipped + * 0: success + */ +static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym) +{ + bool static_branch = !strcmp(sym->sec->name, "__jump_table"); + bool static_call = !strcmp(sym->sec->name, ".static_call_sites"); + struct symbol *code_sym = NULL; + unsigned long code_offset = 0; + struct reloc *reloc; + int ret = 0; + + if (!static_branch && !static_call) + return 0; + + sym_for_each_reloc(e->patched, sym, reloc) { + const char *sym_modname; + struct export *export; + + /* Static branch/call keys are always STT_OBJECT */ + if (reloc->sym->type != STT_OBJECT) { + + /* Save code location which can be printed below */ + if (reloc->sym->type == STT_FUNC && !code_sym) { + code_sym = reloc->sym; + code_offset = reloc_addend(reloc); + } + + continue; + } + + if (!klp_reloc_needed(reloc)) + continue; + + export = find_export(reloc->sym); + if (export) { + sym_modname = export->mod; + } else { + sym_modname = find_modname(e); + if (!sym_modname) + return -1; + } + + /* vmlinux keys are ok */ + if (!strcmp(sym_modname, "vmlinux")) + continue; + + if (static_branch) { + if (strstarts(reloc->sym->name, "__tracepoint_")) { + WARN("%s: disabling unsupported tracepoint %s", + code_sym->name, reloc->sym->name + 13); + ret = 1; + continue; + } + + ERROR("%s+0x%lx: unsupported static branch key %s. Use static_key_enabled() instead", + code_sym->name, code_offset, reloc->sym->name); + return -1; + } + + /* static call */ + if (strstarts(reloc->sym->name, "__SCK__tp_func_")) { + ret = 1; + continue; + } + + ERROR("%s()+0x%lx: unsupported static call key %s. Use KLP_STATIC_CALL() instead", + code_sym->name, code_offset, reloc->sym->name); + return -1; + } + + return ret; +} + +static int clone_special_section(struct elfs *e, struct section *patched_sec) +{ + struct symbol *patched_sym; + + /* + * Extract all special section symbols (and their dependencies) which + * reference included functions. + */ + sec_for_each_sym(patched_sec, patched_sym) { + int ret; + + if (!is_object_sym(patched_sym)) + continue; + + if (!should_keep_special_sym(e->patched, patched_sym)) + continue; + + ret = validate_special_section_klp_reloc(e, patched_sym); + if (ret < 0) + return -1; + if (ret > 0) + continue; + + if (!clone_symbol(e, patched_sym, true)) + return -1; + } + + return 0; +} + +/* Extract only the needed bits from special sections */ +static int clone_special_sections(struct elfs *e) +{ + struct section *patched_sec; + + if (create_fake_symbols(e->patched)) + return -1; + + for_each_sec(e->patched, patched_sec) { + if (is_special_section(patched_sec)) { + if (clone_special_section(e, patched_sec)) + return -1; + } + } + + return 0; +} + +/* + * Create __klp_objects and __klp_funcs sections which are intermediate + * sections provided as input to the patch module's init code for building the + * klp_patch, klp_object and klp_func structs for the livepatch API. + */ +static int create_klp_sections(struct elfs *e) +{ + size_t obj_size = sizeof(struct klp_object_ext); + size_t func_size = sizeof(struct klp_func_ext); + struct section *obj_sec, *funcs_sec, *str_sec; + struct symbol *funcs_sym, *str_sym, *sym; + char sym_name[SYM_NAME_LEN]; + unsigned int nr_funcs = 0; + const char *modname; + void *obj_data; + s64 addend; + + obj_sec = elf_create_section_pair(e->out, KLP_OBJECTS_SEC, obj_size, 0, 0); + if (!obj_sec) + return -1; + + funcs_sec = elf_create_section_pair(e->out, KLP_FUNCS_SEC, func_size, 0, 0); + if (!funcs_sec) + return -1; + + funcs_sym = elf_create_section_symbol(e->out, funcs_sec); + if (!funcs_sym) + return -1; + + str_sec = elf_create_section(e->out, KLP_STRINGS_SEC, 0, 0, + SHT_PROGBITS, 1, + SHF_ALLOC | SHF_STRINGS | SHF_MERGE); + if (!str_sec) + return -1; + + if (elf_add_string(e->out, str_sec, "") == -1) + return -1; + + str_sym = elf_create_section_symbol(e->out, str_sec); + if (!str_sym) + return -1; + + /* allocate klp_object_ext */ + obj_data = elf_add_data(e->out, obj_sec, NULL, obj_size); + if (!obj_data) + return -1; + + modname = find_modname(e); + if (!modname) + return -1; + + /* klp_object_ext.name */ + if (strcmp(modname, "vmlinux")) { + addend = elf_add_string(e->out, str_sec, modname); + if (addend == -1) + return -1; + + if (!elf_create_reloc(e->out, obj_sec, + offsetof(struct klp_object_ext, name), + str_sym, addend, R_ABS64)) + return -1; + } + + /* klp_object_ext.funcs */ + if (!elf_create_reloc(e->out, obj_sec, offsetof(struct klp_object_ext, funcs), + funcs_sym, 0, R_ABS64)) + return -1; + + for_each_sym(e->out, sym) { + unsigned long offset = nr_funcs * func_size; + unsigned long sympos; + void *func_data; + + if (!is_func_sym(sym) || sym->cold || !sym->clone || !sym->clone->changed) + continue; + + /* allocate klp_func_ext */ + func_data = elf_add_data(e->out, funcs_sec, NULL, func_size); + if (!func_data) + return -1; + + /* klp_func_ext.old_name */ + addend = elf_add_string(e->out, str_sec, sym->clone->twin->name); + if (addend == -1) + return -1; + + if (!elf_create_reloc(e->out, funcs_sec, + offset + offsetof(struct klp_func_ext, old_name), + str_sym, addend, R_ABS64)) + return -1; + + /* klp_func_ext.new_func */ + if (!elf_create_reloc(e->out, funcs_sec, + offset + offsetof(struct klp_func_ext, new_func), + sym, 0, R_ABS64)) + return -1; + + /* klp_func_ext.sympos */ + BUILD_BUG_ON(sizeof(sympos) != sizeof_field(struct klp_func_ext, sympos)); + sympos = find_sympos(e->orig, sym->clone->twin); + if (sympos == ULONG_MAX) + return -1; + memcpy(func_data + offsetof(struct klp_func_ext, sympos), &sympos, + sizeof_field(struct klp_func_ext, sympos)); + + nr_funcs++; + } + + /* klp_object_ext.nr_funcs */ + BUILD_BUG_ON(sizeof(nr_funcs) != sizeof_field(struct klp_object_ext, nr_funcs)); + memcpy(obj_data + offsetof(struct klp_object_ext, nr_funcs), &nr_funcs, + sizeof_field(struct klp_object_ext, nr_funcs)); + + /* + * Find callback pointers created by KLP_PRE_PATCH_CALLBACK() and + * friends, and add them to the klp object. + */ + + if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_PATCH_PREFIX "%s", modname)) + return -1; + + sym = find_symbol_by_name(e->out, sym_name); + if (sym) { + struct reloc *reloc; + + reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset); + + if (!elf_create_reloc(e->out, obj_sec, + offsetof(struct klp_object_ext, callbacks) + + offsetof(struct klp_callbacks, pre_patch), + reloc->sym, reloc_addend(reloc), R_ABS64)) + return -1; + } + + if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_PATCH_PREFIX "%s", modname)) + return -1; + + sym = find_symbol_by_name(e->out, sym_name); + if (sym) { + struct reloc *reloc; + + reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset); + + if (!elf_create_reloc(e->out, obj_sec, + offsetof(struct klp_object_ext, callbacks) + + offsetof(struct klp_callbacks, post_patch), + reloc->sym, reloc_addend(reloc), R_ABS64)) + return -1; + } + + if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_UNPATCH_PREFIX "%s", modname)) + return -1; + + sym = find_symbol_by_name(e->out, sym_name); + if (sym) { + struct reloc *reloc; + + reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset); + + if (!elf_create_reloc(e->out, obj_sec, + offsetof(struct klp_object_ext, callbacks) + + offsetof(struct klp_callbacks, pre_unpatch), + reloc->sym, reloc_addend(reloc), R_ABS64)) + return -1; + } + + if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_UNPATCH_PREFIX "%s", modname)) + return -1; + + sym = find_symbol_by_name(e->out, sym_name); + if (sym) { + struct reloc *reloc; + + reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset); + + if (!elf_create_reloc(e->out, obj_sec, + offsetof(struct klp_object_ext, callbacks) + + offsetof(struct klp_callbacks, post_unpatch), + reloc->sym, reloc_addend(reloc), R_ABS64)) + return -1; + } + + return 0; +} + +/* + * Copy all .modinfo import_ns= tags to ensure all namespaced exported symbols + * can be accessed via normal relocs. + */ +static int copy_import_ns(struct elfs *e) +{ + struct section *patched_sec, *out_sec = NULL; + char *import_ns, *data_end; + + patched_sec = find_section_by_name(e->patched, ".modinfo"); + if (!patched_sec) + return 0; + + import_ns = patched_sec->data->d_buf; + if (!import_ns) + return 0; + + for (data_end = import_ns + sec_size(patched_sec); + import_ns < data_end; + import_ns += strlen(import_ns) + 1) { + + import_ns = memmem(import_ns, data_end - import_ns, "import_ns=", 10); + if (!import_ns) + return 0; + + if (!out_sec) { + out_sec = find_section_by_name(e->out, ".modinfo"); + if (!out_sec) { + out_sec = elf_create_section(e->out, ".modinfo", 0, + patched_sec->sh.sh_entsize, + patched_sec->sh.sh_type, + patched_sec->sh.sh_addralign, + patched_sec->sh.sh_flags); + if (!out_sec) + return -1; + } + } + + if (!elf_add_data(e->out, out_sec, import_ns, strlen(import_ns) + 1)) + return -1; + } + + return 0; +} + +int cmd_klp_diff(int argc, const char **argv) +{ + struct elfs e = {0}; + + argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0); + if (argc != 3) + usage_with_options(klp_diff_usage, klp_diff_options); + + objname = argv[0]; + + e.orig = elf_open_read(argv[0], O_RDONLY); + e.patched = elf_open_read(argv[1], O_RDONLY); + e.out = NULL; + + if (!e.orig || !e.patched) + return -1; + + if (read_exports()) + return -1; + + if (read_sym_checksums(e.orig)) + return -1; + + if (read_sym_checksums(e.patched)) + return -1; + + if (correlate_symbols(&e)) + return -1; + + if (mark_changed_functions(&e)) + return 0; + + e.out = elf_create_file(&e.orig->ehdr, argv[2]); + if (!e.out) + return -1; + + if (clone_included_functions(&e)) + return -1; + + if (clone_special_sections(&e)) + return -1; + + if (create_klp_sections(&e)) + return -1; + + if (copy_import_ns(&e)) + return -1; + + if (elf_write(e.out)) + return -1; + + return elf_close(e.out); +} diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 5c8b974ad0f9..c8f611c1320d 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -16,8 +16,6 @@ #include #include -bool help; - static struct objtool_file file; struct objtool_file *objtool_open_read(const char *filename) @@ -71,6 +69,39 @@ int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) return 0; } +char *top_level_dir(const char *file) +{ + ssize_t len, self_len, file_len; + char self[PATH_MAX], *str; + int i; + + len = readlink("/proc/self/exe", self, sizeof(self) - 1); + if (len <= 0) + return NULL; + self[len] = '\0'; + + for (i = 0; i < 3; i++) { + char *s = strrchr(self, '/'); + if (!s) + return NULL; + *s = '\0'; + } + + self_len = strlen(self); + file_len = strlen(file); + + str = malloc(self_len + file_len + 2); + if (!str) + return NULL; + + memcpy(str, self, self_len); + str[self_len] = '/'; + strcpy(str + self_len + 1, file); + + return str; +} + + int main(int argc, const char **argv) { static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED"; @@ -79,5 +110,11 @@ int main(int argc, const char **argv) exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED); pager_init(UNUSED); + if (argc > 1 && !strcmp(argv[1], "klp")) { + argc--; + argv++; + return cmd_klp(argc, argv); + } + return objtool_run(argc, argv); } diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 86d64e3ac6f7..e38167ca56a9 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -17,6 +17,7 @@ arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk include/linux/interval_tree_generic.h +include/linux/livepatch_external.h include/linux/static_call_types.h " diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index d83f607733b0..d6562f292259 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #define UNSUPPORTED(name) \ ({ \ @@ -24,3 +26,8 @@ int __weak orc_create(struct objtool_file *file) { UNSUPPORTED("ORC"); } + +int __weak cmd_klp(int argc, const char **argv) +{ + UNSUPPORTED("klp"); +} -- cgit v1.2.3 From f2c356d1d0f048e88c281a4178c8b2db138d3ac1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:05 -0700 Subject: kbuild,objtool: Defer objtool validation step for CONFIG_KLP_BUILD In preparation for klp-build, defer objtool validation for CONFIG_KLP_BUILD kernels until the final pre-link archive (e.g., vmlinux.o, module-foo.o) is built. This will simplify the process of generating livepatch modules. Delayed objtool is generally preferred anyway, and is already standard for IBT and LTO. Eventually the per-translation-unit mode will be phased out. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 2 +- scripts/link-vmlinux.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 15fee73e9289..28a1c08e3b22 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -197,7 +197,7 @@ objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ $(if $(part-of-module), --module) -delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) +delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT),$(CONFIG_KLP_BUILD)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 433849ff7529..2df714ba51a9 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -60,7 +60,8 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then + if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT || + is_enabled CONFIG_KLP_BUILD; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= -- cgit v1.2.3 From abaf1f42ddd070662fb419aed29c985ea209bd88 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:06 -0700 Subject: livepatch/klp-build: Introduce fix-patch-lines script to avoid __LINE__ diff noise The __LINE__ macro creates challenges for binary diffing. When a .patch file adds or removes lines, it shifts the line numbers for all code below it. This can cause the code generation of functions using __LINE__ to change due to the line number constant being embedded in a MOV instruction, despite there being no semantic difference. Avoid such false positives by adding a fix-patch-lines script which can be used to insert a #line directive in each patch hunk affecting the line numbering. This script will be used by klp-build, which will be introduced in a subsequent patch. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- MAINTAINERS | 1 + scripts/livepatch/fix-patch-lines | 79 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100755 scripts/livepatch/fix-patch-lines (limited to 'scripts') diff --git a/MAINTAINERS b/MAINTAINERS index 755e2528f839..fc573e9a523c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14443,6 +14443,7 @@ F: include/linux/livepatch*.h F: kernel/livepatch/ F: kernel/module/livepatch.c F: samples/livepatch/ +F: scripts/livepatch/ F: tools/testing/selftests/livepatch/ LLC (802.2) diff --git a/scripts/livepatch/fix-patch-lines b/scripts/livepatch/fix-patch-lines new file mode 100755 index 000000000000..73c5e3dea46e --- /dev/null +++ b/scripts/livepatch/fix-patch-lines @@ -0,0 +1,79 @@ +#!/usr/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# +# Use #line directives to preserve original __LINE__ numbers across patches to +# avoid unwanted compilation changes. + +BEGIN { + in_hunk = 0 + skip = 0 +} + +/^--- / { + skip = $2 !~ /\.(c|h)$/ + print + next +} + +/^@@/ { + if (skip) { + print + next + } + + in_hunk = 1 + + # for @@ -1,3 +1,4 @@: + # 1: line number in old file + # 3: how many lines the hunk covers in old file + # 1: line number in new file + # 4: how many lines the hunk covers in new file + + match($0, /^@@ -([0-9]+)(,([0-9]+))? \+([0-9]+)(,([0-9]+))? @@/, m) + + # Set 'cur' to the old file's line number at the start of the hunk. It + # gets incremented for every context line and every line removal, so + # that it always represents the old file's current line number. + cur = m[1] + + # last = last line number of current hunk + last = cur + (m[3] ? m[3] : 1) - 1 + + need_line_directive = 0 + + print + next +} + +{ + if (skip || !in_hunk || $0 ~ /^\\ No newline at end of file/) { + print + next + } + + # change line + if ($0 ~ /^[+-]/) { + # inject #line after this group of changes + need_line_directive = 1 + + if ($0 ~ /^-/) + cur++ + + print + next + } + + # If this is the first context line after a group of changes, inject + # the #line directive to force the compiler to correct the line + # numbering to match the original file. + if (need_line_directive) { + print "+#line " cur + need_line_directive = 0 + } + + if (cur == last) + in_hunk = 0 + + cur++ + print +} -- cgit v1.2.3 From 59adee07b568fb78e2bf07df1f22f3fe45b7240a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:07 -0700 Subject: livepatch/klp-build: Add stub init code for livepatch modules Add a module initialization stub which can be linked with binary diff objects to produce a livepatch module. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/init.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 scripts/livepatch/init.c (limited to 'scripts') diff --git a/scripts/livepatch/init.c b/scripts/livepatch/init.c new file mode 100644 index 000000000000..2274d8f5a482 --- /dev/null +++ b/scripts/livepatch/init.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Init code for a livepatch kernel module + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +extern struct klp_object_ext __start_klp_objects[]; +extern struct klp_object_ext __stop_klp_objects[]; + +static struct klp_patch *patch; + +static int __init livepatch_mod_init(void) +{ + struct klp_object *objs; + unsigned int nr_objs; + int ret; + + nr_objs = __stop_klp_objects - __start_klp_objects; + + if (!nr_objs) { + pr_err("nothing to patch!\n"); + ret = -EINVAL; + goto err; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + ret = -ENOMEM; + goto err; + } + + objs = kzalloc(sizeof(struct klp_object) * (nr_objs + 1), GFP_KERNEL); + if (!objs) { + ret = -ENOMEM; + goto err_free_patch; + } + + for (int i = 0; i < nr_objs; i++) { + struct klp_object_ext *obj_ext = __start_klp_objects + i; + struct klp_func_ext *funcs_ext = obj_ext->funcs; + unsigned int nr_funcs = obj_ext->nr_funcs; + struct klp_func *funcs = objs[i].funcs; + struct klp_object *obj = objs + i; + + funcs = kzalloc(sizeof(struct klp_func) * (nr_funcs + 1), GFP_KERNEL); + if (!funcs) { + ret = -ENOMEM; + for (int j = 0; j < i; j++) + kfree(objs[i].funcs); + goto err_free_objs; + } + + for (int j = 0; j < nr_funcs; j++) { + funcs[j].old_name = funcs_ext[j].old_name; + funcs[j].new_func = funcs_ext[j].new_func; + funcs[j].old_sympos = funcs_ext[j].sympos; + } + + obj->name = obj_ext->name; + obj->funcs = funcs; + + memcpy(&obj->callbacks, &obj_ext->callbacks, sizeof(struct klp_callbacks)); + } + + patch->mod = THIS_MODULE; + patch->objs = objs; + + /* TODO patch->states */ + +#ifdef KLP_NO_REPLACE + patch->replace = false; +#else + patch->replace = true; +#endif + + return klp_enable_patch(patch); + +err_free_objs: + kfree(objs); +err_free_patch: + kfree(patch); +err: + return ret; +} + +static void __exit livepatch_mod_exit(void) +{ + unsigned int nr_objs; + + nr_objs = __stop_klp_objects - __start_klp_objects; + + for (int i = 0; i < nr_objs; i++) + kfree(patch->objs[i].funcs); + + kfree(patch->objs); + kfree(patch); +} + +module_init(livepatch_mod_init); +module_exit(livepatch_mod_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_DESCRIPTION("Livepatch module"); -- cgit v1.2.3 From 24ebfcd65a871df4555b98c49c9ed9a92f146113 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:08 -0700 Subject: livepatch/klp-build: Introduce klp-build script for generating livepatch modules Add a klp-build script which automates the generation of a livepatch module from a source .patch file by performing the following steps: - Builds an original kernel with -function-sections and -fdata-sections, plus objtool function checksumming. - Applies the .patch file and rebuilds the kernel using the same options. - Runs 'objtool klp diff' to detect changed functions and generate intermediate binary diff objects. - Builds a kernel module which links the diff objects with some livepatch module init code (scripts/livepatch/init.c). - Finalizes the livepatch module (aka work around linker wreckage) using 'objtool klp post-link'. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 1 + scripts/livepatch/fix-patch-lines | 2 +- scripts/livepatch/klp-build | 743 ++++++++++++++++++++++++++++++++++++++ tools/objtool/klp-diff.c | 6 +- 4 files changed, 749 insertions(+), 3 deletions(-) create mode 100755 scripts/livepatch/klp-build (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 28a1c08e3b22..f4b33919ec37 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -173,6 +173,7 @@ ifdef CONFIG_OBJTOOL objtool := $(objtree)/tools/objtool/objtool +objtool-args-$(CONFIG_KLP_BUILD) += --checksum objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake diff --git a/scripts/livepatch/fix-patch-lines b/scripts/livepatch/fix-patch-lines index 73c5e3dea46e..fa7d4f6592e6 100755 --- a/scripts/livepatch/fix-patch-lines +++ b/scripts/livepatch/fix-patch-lines @@ -23,7 +23,7 @@ BEGIN { in_hunk = 1 - # for @@ -1,3 +1,4 @@: + # @@ -1,3 +1,4 @@: # 1: line number in old file # 3: how many lines the hunk covers in old file # 1: line number in new file diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build new file mode 100755 index 000000000000..01ed0b66bfaf --- /dev/null +++ b/scripts/livepatch/klp-build @@ -0,0 +1,743 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Build a livepatch module + +# shellcheck disable=SC1090,SC2155 + +if (( BASH_VERSINFO[0] < 4 || \ + (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 4) )); then + echo "error: this script requires bash 4.4+" >&2 + exit 1 +fi + +set -o errexit +set -o errtrace +set -o pipefail +set -o nounset + +# Allow doing 'cmd | mapfile -t array' instead of 'mapfile -t array < <(cmd)'. +# This helps keep execution in pipes so pipefail+errexit can catch errors. +shopt -s lastpipe + +unset SKIP_CLEANUP XTRACE + +REPLACE=1 +SHORT_CIRCUIT=0 +JOBS="$(getconf _NPROCESSORS_ONLN)" +VERBOSE="-s" +shopt -o xtrace | grep -q 'on' && XTRACE=1 + +# Avoid removing the previous $TMP_DIR until args have been fully processed. +KEEP_TMP=1 + +SCRIPT="$(basename "$0")" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FIX_PATCH_LINES="$SCRIPT_DIR/fix-patch-lines" + +SRC="$(pwd)" +OBJ="$(pwd)" + +CONFIG="$OBJ/.config" +TMP_DIR="$OBJ/klp-tmp" + +ORIG_DIR="$TMP_DIR/orig" +PATCHED_DIR="$TMP_DIR/patched" +DIFF_DIR="$TMP_DIR/diff" +KMOD_DIR="$TMP_DIR/kmod" + +STASH_DIR="$TMP_DIR/stash" +TIMESTAMP="$TMP_DIR/timestamp" +PATCH_TMP_DIR="$TMP_DIR/tmp" + +KLP_DIFF_LOG="$DIFF_DIR/diff.log" + +grep0() { + command grep "$@" || true +} + +status() { + echo "$*" +} + +warn() { + echo "error: $SCRIPT: $*" >&2 +} + +die() { + warn "$@" + exit 1 +} + +declare -a STASHED_FILES + +stash_file() { + local file="$1" + local rel_file="${file#"$SRC"/}" + + [[ ! -e "$file" ]] && die "no file to stash: $file" + + mkdir -p "$STASH_DIR/$(dirname "$rel_file")" + cp -f "$file" "$STASH_DIR/$rel_file" + + STASHED_FILES+=("$rel_file") +} + +restore_files() { + local file + + for file in "${STASHED_FILES[@]}"; do + mv -f "$STASH_DIR/$file" "$SRC/$file" || warn "can't restore file: $file" + done + + STASHED_FILES=() +} + +cleanup() { + set +o nounset + revert_patches "--recount" + restore_files + [[ "$KEEP_TMP" -eq 0 ]] && rm -rf "$TMP_DIR" + return 0 +} + +trap_err() { + warn "line ${BASH_LINENO[0]}: '$BASH_COMMAND'" +} + +trap cleanup EXIT INT TERM HUP +trap trap_err ERR + +__usage() { + cat < Build jobs to run simultaneously [default: $JOBS] + -o, --output= Output file [default: livepatch-.ko] + --no-replace Disable livepatch atomic replace + -v, --verbose Pass V=1 to kernel/module builds + +Advanced Options: + -S, --short-circuit=STEP Start at build step (requires prior --keep-tmp) + 1|orig Build original kernel (default) + 2|patched Build patched kernel + 3|diff Diff objects + 4|kmod Build patch module + -T, --keep-tmp Preserve tmp dir on exit + +EOF +} + +usage() { + __usage >&2 +} + +process_args() { + local keep_tmp=0 + local short + local long + local args + + short="hj:o:vS:T" + long="help,jobs:,output:,no-replace,verbose,short-circuit:,keep-tmp" + + args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { + echo; usage; exit + } + eval set -- "$args" + + while true; do + case "$1" in + -h | --help) + usage + exit 0 + ;; + -j | --jobs) + JOBS="$2" + shift 2 + ;; + -o | --output) + [[ "$2" != *.ko ]] && die "output filename should end with .ko" + OUTFILE="$2" + NAME="$(basename "$OUTFILE")" + NAME="${NAME%.ko}" + NAME="$(module_name_string "$NAME")" + shift 2 + ;; + --no-replace) + REPLACE=0 + shift + ;; + -v | --verbose) + VERBOSE="V=1" + shift + ;; + -S | --short-circuit) + [[ ! -d "$TMP_DIR" ]] && die "--short-circuit requires preserved klp-tmp dir" + keep_tmp=1 + case "$2" in + 1 | orig) SHORT_CIRCUIT=1; ;; + 2 | patched) SHORT_CIRCUIT=2; ;; + 3 | diff) SHORT_CIRCUIT=3; ;; + 4 | mod) SHORT_CIRCUIT=4; ;; + *) die "invalid short-circuit step '$2'" ;; + esac + shift 2 + ;; + -T | --keep-tmp) + keep_tmp=1 + shift + ;; + --) + shift + break + ;; + *) + usage + exit 1 + ;; + esac + done + + if [[ $# -eq 0 ]]; then + usage + exit 1 + fi + + KEEP_TMP="$keep_tmp" + PATCHES=("$@") +} + +# temporarily disable xtrace for especially verbose code +xtrace_save() { + [[ -v XTRACE ]] && set +x + return 0 +} + +xtrace_restore() { + [[ -v XTRACE ]] && set -x + return 0 +} + +validate_config() { + xtrace_save "reading .config" + source "$CONFIG" || die "no .config file in $(dirname "$CONFIG")" + xtrace_restore + + [[ -v CONFIG_LIVEPATCH ]] || \ + die "CONFIG_LIVEPATCH not enabled" + + [[ -v CONFIG_KLP_BUILD ]] || \ + die "CONFIG_KLP_BUILD not enabled" + + [[ -v CONFIG_GCC_PLUGIN_LATENT_ENTROPY ]] && \ + die "kernel option 'CONFIG_GCC_PLUGIN_LATENT_ENTROPY' not supported" + + [[ -v CONFIG_GCC_PLUGIN_RANDSTRUCT ]] && \ + die "kernel option 'CONFIG_GCC_PLUGIN_RANDSTRUCT' not supported" + + return 0 +} + +# Only allow alphanumerics and '_' and '-' in the module name. Everything else +# is replaced with '-'. Also truncate to 55 chars so the full name + NUL +# terminator fits in the kernel's 56-byte module name array. +module_name_string() { + echo "${1//[^a-zA-Z0-9_-]/-}" | cut -c 1-55 +} + +# If the module name wasn't specified on the cmdline with --output, give it a +# name based on the patch name. +set_module_name() { + [[ -v NAME ]] && return 0 + + if [[ "${#PATCHES[@]}" -eq 1 ]]; then + NAME="$(basename "${PATCHES[0]}")" + NAME="${NAME%.*}" + else + NAME="patch" + fi + + NAME="livepatch-$NAME" + NAME="$(module_name_string "$NAME")" + + OUTFILE="$NAME.ko" +} + +# Hardcode the value printed by the localversion script to prevent patch +# application from appending it with '+' due to a dirty git working tree. +set_kernelversion() { + local file="$SRC/scripts/setlocalversion" + local localversion + + stash_file "$file" + + localversion="$(cd "$SRC" && make --no-print-directory kernelversion)" + localversion="$(cd "$SRC" && KERNELVERSION="$localversion" ./scripts/setlocalversion)" + [[ -z "$localversion" ]] && die "setlocalversion failed" + + sed -i "2i echo $localversion; exit 0" scripts/setlocalversion +} + +get_patch_files() { + local patch="$1" + + grep0 -E '^(--- |\+\+\+ )' "$patch" \ + | gawk '{print $2}' \ + | sed 's|^[^/]*/||' \ + | sort -u +} + +# Make sure git re-stats the changed files +git_refresh() { + local patch="$1" + local files=() + + [[ ! -e "$SRC/.git" ]] && return + + get_patch_files "$patch" | mapfile -t files + + ( + cd "$SRC" + git update-index -q --refresh -- "${files[@]}" + ) +} + +check_unsupported_patches() { + local patch + + for patch in "${PATCHES[@]}"; do + local files=() + + get_patch_files "$patch" | mapfile -t files + + for file in "${files[@]}"; do + case "$file" in + lib/*|*.S) + die "unsupported patch to $file" + ;; + esac + done + done +} + +apply_patch() { + local patch="$1" + shift + local extra_args=("$@") + + [[ ! -f "$patch" ]] && die "$patch doesn't exist" + + ( + cd "$SRC" + + # The sed strips the version signature from 'git format-patch', + # otherwise 'git apply --recount' warns. + sed -n '/^-- /q;p' "$patch" | + git apply "${extra_args[@]}" + ) + + APPLIED_PATCHES+=("$patch") +} + +revert_patch() { + local patch="$1" + shift + local extra_args=("$@") + local tmp=() + + ( + cd "$SRC" + + sed -n '/^-- /q;p' "$patch" | + git apply --reverse "${extra_args[@]}" + ) + git_refresh "$patch" + + for p in "${APPLIED_PATCHES[@]}"; do + [[ "$p" == "$patch" ]] && continue + tmp+=("$p") + done + + APPLIED_PATCHES=("${tmp[@]}") +} + +apply_patches() { + local patch + + for patch in "${PATCHES[@]}"; do + apply_patch "$patch" + done +} + +revert_patches() { + local extra_args=("$@") + local patches=("${APPLIED_PATCHES[@]}") + + for (( i=${#patches[@]}-1 ; i>=0 ; i-- )) ; do + revert_patch "${patches[$i]}" "${extra_args[@]}" + done + + APPLIED_PATCHES=() +} + +validate_patches() { + check_unsupported_patches + apply_patches + revert_patches +} + +do_init() { + # We're not yet smart enough to handle anything other than in-tree + # builds in pwd. + [[ ! "$SRC" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory" + [[ ! "$OBJ" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory" + + (( SHORT_CIRCUIT <= 1 )) && rm -rf "$TMP_DIR" + mkdir -p "$TMP_DIR" + + APPLIED_PATCHES=() + + [[ -x "$FIX_PATCH_LINES" ]] || die "can't find fix-patch-lines" + + validate_config + set_module_name + set_kernelversion +} + +# Refresh the patch hunk headers, specifically the line numbers and counts. +refresh_patch() { + local patch="$1" + local tmpdir="$PATCH_TMP_DIR" + local files=() + + rm -rf "$tmpdir" + mkdir -p "$tmpdir/a" + mkdir -p "$tmpdir/b" + + # Get all source files affected by the patch + get_patch_files "$patch" | mapfile -t files + + # Copy orig source files to 'a' + ( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/a" ) + + # Copy patched source files to 'b' + apply_patch "$patch" --recount + ( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/b" ) + revert_patch "$patch" --recount + + # Diff 'a' and 'b' to make a clean patch + ( cd "$tmpdir" && git diff --no-index --no-prefix a b > "$patch" ) || true +} + +# Copy the patches to a temporary directory, fix their lines so as not to +# affect the __LINE__ macro for otherwise unchanged functions further down the +# file, and update $PATCHES to point to the fixed patches. +fix_patches() { + local idx + local i + + rm -f "$TMP_DIR"/*.patch + + idx=0001 + for i in "${!PATCHES[@]}"; do + local old_patch="${PATCHES[$i]}" + local tmp_patch="$TMP_DIR/tmp.patch" + local patch="${PATCHES[$i]}" + local new_patch + + new_patch="$TMP_DIR/$idx-fixed-$(basename "$patch")" + + cp -f "$old_patch" "$tmp_patch" + refresh_patch "$tmp_patch" + "$FIX_PATCH_LINES" "$tmp_patch" > "$new_patch" + refresh_patch "$new_patch" + + PATCHES[i]="$new_patch" + + rm -f "$tmp_patch" + idx=$(printf "%04d" $(( 10#$idx + 1 ))) + done +} + +clean_kernel() { + local cmd=() + + cmd=("make") + cmd+=("--silent") + cmd+=("-j$JOBS") + cmd+=("clean") + + ( + cd "$SRC" + "${cmd[@]}" + ) +} + +build_kernel() { + local log="$TMP_DIR/build.log" + local cmd=() + + cmd=("make") + + # When a patch to a kernel module references a newly created unexported + # symbol which lives in vmlinux or another kernel module, the patched + # kernel build fails with the following error: + # + # ERROR: modpost: "klp_string" [fs/xfs/xfs.ko] undefined! + # + # The undefined symbols are working as designed in that case. They get + # resolved later when the livepatch module build link pulls all the + # disparate objects together into the same kernel module. + # + # It would be good to have a way to tell modpost to skip checking for + # undefined symbols altogether. For now, just convert the error to a + # warning with KBUILD_MODPOST_WARN, and grep out the warning to avoid + # confusing the user. + # + cmd+=("KBUILD_MODPOST_WARN=1") + + cmd+=("$VERBOSE") + cmd+=("-j$JOBS") + cmd+=("KCFLAGS=-ffunction-sections -fdata-sections") + cmd+=("vmlinux") + cmd+=("modules") + + ( + cd "$SRC" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" | grep0 -v "modpost.*undefined!" >&2) + ) +} + +find_objects() { + local opts=("$@") + + # Find root-level vmlinux.o and non-root-level .ko files, + # excluding klp-tmp/ and .git/ + find "$OBJ" \( -path "$TMP_DIR" -o -path "$OBJ/.git" -o -regex "$OBJ/[^/][^/]*\.ko" \) -prune -o \ + -type f "${opts[@]}" \ + \( -name "*.ko" -o -path "$OBJ/vmlinux.o" \) \ + -printf '%P\n' +} + +# Copy all .o archives to $ORIG_DIR +copy_orig_objects() { + local files=() + + rm -rf "$ORIG_DIR" + mkdir -p "$ORIG_DIR" + + find_objects | mapfile -t files + + xtrace_save "copying orig objects" + for _file in "${files[@]}"; do + local rel_file="${_file/.ko/.o}" + local file="$OBJ/$rel_file" + local file_dir="$(dirname "$file")" + local orig_file="$ORIG_DIR/$rel_file" + local orig_dir="$(dirname "$orig_file")" + local cmd_file="$file_dir/.$(basename "$file").cmd" + + [[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file" + + mkdir -p "$orig_dir" + cp -f "$file" "$orig_dir" + [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$orig_dir" + done + xtrace_restore + + mv -f "$TMP_DIR/build.log" "$ORIG_DIR" + touch "$TIMESTAMP" +} + +# Copy all changed objects to $PATCHED_DIR +copy_patched_objects() { + local files=() + local opts=() + local found=0 + + rm -rf "$PATCHED_DIR" + mkdir -p "$PATCHED_DIR" + + # Note this doesn't work with some configs, thus the 'cmp' below. + opts=("-newer") + opts+=("$TIMESTAMP") + + find_objects "${opts[@]}" | mapfile -t files + + xtrace_save "copying changed objects" + for _file in "${files[@]}"; do + local rel_file="${_file/.ko/.o}" + local file="$OBJ/$rel_file" + local orig_file="$ORIG_DIR/$rel_file" + local patched_file="$PATCHED_DIR/$rel_file" + local patched_dir="$(dirname "$patched_file")" + + [[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file" + + cmp -s "$orig_file" "$file" && continue + + mkdir -p "$patched_dir" + cp -f "$file" "$patched_dir" + found=1 + done + xtrace_restore + + (( found == 0 )) && die "no changes detected" + + mv -f "$TMP_DIR/build.log" "$PATCHED_DIR" +} + +# Diff changed objects, writing output object to $DIFF_DIR +diff_objects() { + local log="$KLP_DIFF_LOG" + local files=() + + rm -rf "$DIFF_DIR" + mkdir -p "$DIFF_DIR" + + find "$PATCHED_DIR" -type f -name "*.o" | mapfile -t files + [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + + # Diff all changed objects + for file in "${files[@]}"; do + local rel_file="${file#"$PATCHED_DIR"/}" + local orig_file="$rel_file" + local patched_file="$PATCHED_DIR/$rel_file" + local out_file="$DIFF_DIR/$rel_file" + local cmd=() + + mkdir -p "$(dirname "$out_file")" + + cmd=("$SRC/tools/objtool/objtool") + cmd+=("klp") + cmd+=("diff") + cmd+=("$orig_file") + cmd+=("$patched_file") + cmd+=("$out_file") + + ( + cd "$ORIG_DIR" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" >&2) || \ + die "objtool klp diff failed" + ) + done +} + +# Build and post-process livepatch module in $KMOD_DIR +build_patch_module() { + local makefile="$KMOD_DIR/Kbuild" + local log="$KMOD_DIR/build.log" + local kmod_file + local cflags=() + local files=() + local cmd=() + + rm -rf "$KMOD_DIR" + mkdir -p "$KMOD_DIR" + + cp -f "$SRC/scripts/livepatch/init.c" "$KMOD_DIR" + + echo "obj-m := $NAME.o" > "$makefile" + echo -n "$NAME-y := init.o" >> "$makefile" + + find "$DIFF_DIR" -type f -name "*.o" | mapfile -t files + [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + + for file in "${files[@]}"; do + local rel_file="${file#"$DIFF_DIR"/}" + local orig_file="$ORIG_DIR/$rel_file" + local orig_dir="$(dirname "$orig_file")" + local kmod_file="$KMOD_DIR/$rel_file" + local kmod_dir="$(dirname "$kmod_file")" + local cmd_file="$orig_dir/.$(basename "$file").cmd" + + mkdir -p "$kmod_dir" + cp -f "$file" "$kmod_dir" + [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$kmod_dir" + + # Tell kbuild this is a prebuilt object + cp -f "$file" "${kmod_file}_shipped" + + echo -n " $rel_file" >> "$makefile" + done + + echo >> "$makefile" + + cflags=("-ffunction-sections") + cflags+=("-fdata-sections") + [[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE") + + cmd=("make") + cmd+=("$VERBOSE") + cmd+=("-j$JOBS") + cmd+=("--directory=.") + cmd+=("M=$KMOD_DIR") + cmd+=("KCFLAGS=${cflags[*]}") + + # Build a "normal" kernel module with init.c and the diffed objects + ( + cd "$SRC" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" >&2) + ) + + kmod_file="$KMOD_DIR/$NAME.ko" + + # Save off the intermediate binary for debugging + cp -f "$kmod_file" "$kmod_file.orig" + + # Work around issue where slight .config change makes corrupt BTF + objcopy --remove-section=.BTF "$kmod_file" + + # Fix (and work around) linker wreckage for klp syms / relocs + "$SRC/tools/objtool/objtool" klp post-link "$kmod_file" || die "objtool klp post-link failed" + + cp -f "$kmod_file" "$OUTFILE" +} + + +################################################################################ + +process_args "$@" +do_init + +if (( SHORT_CIRCUIT <= 1 )); then + status "Validating patch(es)" + validate_patches + status "Building original kernel" + clean_kernel + build_kernel + status "Copying original object files" + copy_orig_objects +fi + +if (( SHORT_CIRCUIT <= 2 )); then + status "Fixing patch(es)" + fix_patches + apply_patches + status "Building patched kernel" + build_kernel + revert_patches + status "Copying patched object files" + copy_patched_objects +fi + +if (( SHORT_CIRCUIT <= 3 )); then + status "Diffing objects" + diff_objects +fi + +if (( SHORT_CIRCUIT <= 4 )); then + status "Building patch module: $OUTFILE" + build_patch_module +fi + +status "SUCCESS" diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index 817d44394a78..4d1f9e9977eb 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c @@ -241,10 +241,12 @@ static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym) static bool is_uncorrelated_static_local(struct symbol *sym) { static const char * const vars[] = { - "__key.", - "__warned.", "__already_done.", "__func__.", + "__key.", + "__warned.", + "_entry.", + "_entry_ptr.", "_rs.", "descriptor.", "CSWTCH.", -- cgit v1.2.3 From 2c2f0b8626917c48e4b12827d296a3c654612b90 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:09 -0700 Subject: livepatch/klp-build: Add --debug option to show cloning decisions Add a --debug option which gets passed to "objtool klp diff" to enable debug output related to cloning decisions. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/klp-build | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 01ed0b66bfaf..28ee259ce5f6 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -20,7 +20,7 @@ set -o nounset # This helps keep execution in pipes so pipefail+errexit can catch errors. shopt -s lastpipe -unset SKIP_CLEANUP XTRACE +unset DEBUG_CLONE SKIP_CLEANUP XTRACE REPLACE=1 SHORT_CIRCUIT=0 @@ -120,6 +120,7 @@ Options: -v, --verbose Pass V=1 to kernel/module builds Advanced Options: + -d, --debug Show symbol/reloc cloning decisions -S, --short-circuit=STEP Start at build step (requires prior --keep-tmp) 1|orig Build original kernel (default) 2|patched Build patched kernel @@ -140,8 +141,8 @@ process_args() { local long local args - short="hj:o:vS:T" - long="help,jobs:,output:,no-replace,verbose,short-circuit:,keep-tmp" + short="hj:o:vdS:T" + long="help,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { echo; usage; exit @@ -174,6 +175,11 @@ process_args() { VERBOSE="V=1" shift ;; + -d | --debug) + DEBUG_CLONE=1 + keep_tmp=1 + shift + ;; -S | --short-circuit) [[ ! -d "$TMP_DIR" ]] && die "--short-circuit requires preserved klp-tmp dir" keep_tmp=1 @@ -596,6 +602,7 @@ copy_patched_objects() { diff_objects() { local log="$KLP_DIFF_LOG" local files=() + local opts=() rm -rf "$DIFF_DIR" mkdir -p "$DIFF_DIR" @@ -603,6 +610,8 @@ diff_objects() { find "$PATCHED_DIR" -type f -name "*.o" | mapfile -t files [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + [[ -v DEBUG_CLONE ]] && opts=("--debug") + # Diff all changed objects for file in "${files[@]}"; do local rel_file="${file#"$PATCHED_DIR"/}" @@ -616,6 +625,7 @@ diff_objects() { cmd=("$SRC/tools/objtool/objtool") cmd+=("klp") cmd+=("diff") + (( ${#opts[@]} > 0 )) && cmd+=("${opts[@]}") cmd+=("$orig_file") cmd+=("$patched_file") cmd+=("$out_file") -- cgit v1.2.3 From 78be9facfb5e711e5284ef1856401ea909eceeb2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:10 -0700 Subject: livepatch/klp-build: Add --show-first-changed option to show function divergence Add a --show-first-changed option to identify where changed functions begin to diverge: - Parse 'objtool klp diff' output to find changed functions. - Run objtool again on each object with --debug-checksum=. - Diff the per-instruction checksum debug output to locate the first differing instruction. This can be useful for quickly determining where and why a function changed. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/klp-build | 82 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 28ee259ce5f6..881e052e7fae 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -20,7 +20,7 @@ set -o nounset # This helps keep execution in pipes so pipefail+errexit can catch errors. shopt -s lastpipe -unset DEBUG_CLONE SKIP_CLEANUP XTRACE +unset DEBUG_CLONE DIFF_CHECKSUM SKIP_CLEANUP XTRACE REPLACE=1 SHORT_CIRCUIT=0 @@ -114,6 +114,7 @@ Usage: $SCRIPT [OPTIONS] PATCH_FILE(s) Generate a livepatch module. Options: + -f, --show-first-changed Show address of first changed instruction -j, --jobs= Build jobs to run simultaneously [default: $JOBS] -o, --output= Output file [default: livepatch-.ko] --no-replace Disable livepatch atomic replace @@ -141,8 +142,8 @@ process_args() { local long local args - short="hj:o:vdS:T" - long="help,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" + short="hfj:o:vdS:T" + long="help,show-first-changed,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { echo; usage; exit @@ -155,6 +156,10 @@ process_args() { usage exit 0 ;; + -f | --show-first-changed) + DIFF_CHECKSUM=1 + shift + ;; -j | --jobs) JOBS="$2" shift 2 @@ -618,6 +623,7 @@ diff_objects() { local orig_file="$rel_file" local patched_file="$PATCHED_DIR/$rel_file" local out_file="$DIFF_DIR/$rel_file" + local filter=() local cmd=() mkdir -p "$(dirname "$out_file")" @@ -630,16 +636,80 @@ diff_objects() { cmd+=("$patched_file") cmd+=("$out_file") + if [[ -v DIFF_CHECKSUM ]]; then + filter=("grep0") + filter+=("-Ev") + filter+=("DEBUG: .*checksum: ") + else + filter=("cat") + fi + ( cd "$ORIG_DIR" "${cmd[@]}" \ 1> >(tee -a "$log") \ - 2> >(tee -a "$log" >&2) || \ + 2> >(tee -a "$log" | "${filter[@]}" >&2) || \ die "objtool klp diff failed" ) done } +# For each changed object, run objtool with --debug-checksum to get the +# per-instruction checksums, and then diff those to find the first changed +# instruction for each function. +diff_checksums() { + local orig_log="$ORIG_DIR/checksum.log" + local patched_log="$PATCHED_DIR/checksum.log" + local -A funcs + local cmd=() + local line + local file + local func + + gawk '/\.o: changed function: / { + sub(/:$/, "", $1) + print $1, $NF + }' "$KLP_DIFF_LOG" | mapfile -t lines + + for line in "${lines[@]}"; do + read -r file func <<< "$line" + if [[ ! -v funcs["$file"] ]]; then + funcs["$file"]="$func" + else + funcs["$file"]+=" $func" + fi + done + + cmd=("$SRC/tools/objtool/objtool") + cmd+=("--checksum") + cmd+=("--link") + cmd+=("--dry-run") + + for file in "${!funcs[@]}"; do + local opt="--debug-checksum=${funcs[$file]// /,}" + + ( + cd "$ORIG_DIR" + "${cmd[@]}" "$opt" "$file" &> "$orig_log" || \ + ( cat "$orig_log" >&2; die "objtool --debug-checksum failed" ) + + cd "$PATCHED_DIR" + "${cmd[@]}" "$opt" "$file" &> "$patched_log" || \ + ( cat "$patched_log" >&2; die "objtool --debug-checksum failed" ) + ) + + for func in ${funcs[$file]}; do + diff <( grep0 -E "^DEBUG: .*checksum: $func " "$orig_log" | sed "s|$ORIG_DIR/||") \ + <( grep0 -E "^DEBUG: .*checksum: $func " "$patched_log" | sed "s|$PATCHED_DIR/||") \ + | gawk '/^< DEBUG: / { + gsub(/:/, "") + printf "%s: %s: %s\n", $3, $5, $6 + exit + }' || true + done + done +} + # Build and post-process livepatch module in $KMOD_DIR build_patch_module() { local makefile="$KMOD_DIR/Kbuild" @@ -743,6 +813,10 @@ fi if (( SHORT_CIRCUIT <= 3 )); then status "Diffing objects" diff_objects + if [[ -v DIFF_CHECKSUM ]]; then + status "Finding first changed instructions" + diff_checksums + fi fi if (( SHORT_CIRCUIT <= 4 )); then -- cgit v1.2.3 From 44472d1b83127e579c798ff92a07ae86d98b61b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Oct 2025 13:07:32 +0200 Subject: atomic: Skip alignment check for try_cmpxchg() old arg The 'old' argument in atomic_try_cmpxchg() and related functions is a pointer to a normal non-atomic integer number, which does not require to be naturally aligned, unlike the atomic_t/atomic64_t types themselves. In order to add an alignment check with CONFIG_DEBUG_ATOMIC into the normal instrument_atomic_read_write() helper, change this check to use the non-atomic instrument_read_write(), the same way that was done earlier for try_cmpxchg() in commit ec570320b09f ("locking/atomic: Correct (cmp)xchg() instrumentation"). This prevents warnings on m68k calling the 32-bit atomic_try_cmpxchg() with 16-bit aligned arguments as well as several more architectures including x86-32 when calling atomic64_try_cmpxchg() with 32-bit aligned u64 arguments. Reported-by: Finn Thain Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/cover.1757810729.git.fthain@linux-m68k.org/ --- include/linux/atomic/atomic-instrumented.h | 26 +++++++++++++------------- scripts/atomic/gen-atomic-instrumented.sh | 11 +++++++---- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'scripts') diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 9409a6ddf3e0..37ab6314a9f7 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1276,7 +1276,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg(v, old, new); } @@ -1298,7 +1298,7 @@ static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_acquire(v, old, new); } @@ -1321,7 +1321,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_release(v, old, new); } @@ -1343,7 +1343,7 @@ static __always_inline bool atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_relaxed(v, old, new); } @@ -2854,7 +2854,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg(v, old, new); } @@ -2876,7 +2876,7 @@ static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_acquire(v, old, new); } @@ -2899,7 +2899,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_release(v, old, new); } @@ -2921,7 +2921,7 @@ static __always_inline bool atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_relaxed(v, old, new); } @@ -4432,7 +4432,7 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg(v, old, new); } @@ -4454,7 +4454,7 @@ static __always_inline bool atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_acquire(v, old, new); } @@ -4477,7 +4477,7 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_release(v, old, new); } @@ -4499,7 +4499,7 @@ static __always_inline bool atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_relaxed(v, old, new); } @@ -5050,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 8829b337928e9508259079d32581775ececd415b +// f618ac667f868941a84ce0ab2242f1786e049ed4 diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh index 592f3ec89b5f..9c1d53f81eb2 100755 --- a/scripts/atomic/gen-atomic-instrumented.sh +++ b/scripts/atomic/gen-atomic-instrumented.sh @@ -12,7 +12,7 @@ gen_param_check() local arg="$1"; shift local type="${arg%%:*}" local name="$(gen_param_name "${arg}")" - local rw="write" + local rw="atomic_write" case "${type#c}" in i) return;; @@ -20,14 +20,17 @@ gen_param_check() if [ ${type#c} != ${type} ]; then # We don't write to constant parameters. - rw="read" + rw="atomic_read" + elif [ "${type}" = "p" ] ; then + # The "old" argument in try_cmpxchg() gets accessed non-atomically + rw="read_write" elif [ "${meta}" != "s" ]; then # An atomic RMW: if this parameter is not a constant, and this atomic is # not just a 's'tore, this parameter is both read from and written to. - rw="read_write" + rw="atomic_read_write" fi - printf "\tinstrument_atomic_${rw}(${name}, sizeof(*${name}));\n" + printf "\tinstrument_${rw}(${name}, sizeof(*${name}));\n" } #gen_params_checks(meta, arg...) -- cgit v1.2.3 From b055f4c431e3d0e0508b7541d7c3fa2f9cd2e0ab Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:39 -0400 Subject: sorttable: Move ELF parsing into scripts/elf-parse.[ch] In order to share the elf parsing that is in sorttable.c so that other programs could use the same code, move it into elf-parse.c and elf-parse.h. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004452.752298788@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/Makefile | 3 + scripts/elf-parse.c | 198 ++++++++++++++++++++++ scripts/elf-parse.h | 305 +++++++++++++++++++++++++++++++++ scripts/sorttable.c | 477 ++++------------------------------------------------ 4 files changed, 540 insertions(+), 443 deletions(-) create mode 100644 scripts/elf-parse.c create mode 100644 scripts/elf-parse.h (limited to 'scripts') diff --git a/scripts/Makefile b/scripts/Makefile index 46f860529df5..f19624b3ed92 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -12,6 +12,8 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen +sorttable-objs := sorttable.o elf-parse.o + ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) always-$(CONFIG_RUST) += target.json filechk_rust_target = $< < include/config/auto.conf @@ -25,6 +27,7 @@ generate_rust_target-rust := y rustdoc_test_builder-rust := y rustdoc_test_gen-rust := y +HOSTCFLAGS_elf-parse.o = -I$(srctree)/tools/include HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTLDLIBS_sorttable = -lpthread HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include diff --git a/scripts/elf-parse.c b/scripts/elf-parse.c new file mode 100644 index 000000000000..99869ff91a8c --- /dev/null +++ b/scripts/elf-parse.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elf-parse.h" + +struct elf_funcs elf_parser; + +/* + * Get the whole file as a programming convenience in order to avoid + * malloc+lseek+read+free of many pieces. If successful, then mmap + * avoids copying unused pieces; else just read the whole file. + * Open for both read and write. + */ +static void *map_file(char const *fname, size_t *size) +{ + int fd; + struct stat sb; + void *addr = NULL; + + fd = open(fname, O_RDWR); + if (fd < 0) { + perror(fname); + return NULL; + } + if (fstat(fd, &sb) < 0) { + perror(fname); + goto out; + } + if (!S_ISREG(sb.st_mode)) { + fprintf(stderr, "not a regular file: %s\n", fname); + goto out; + } + + addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + fprintf(stderr, "Could not mmap file: %s\n", fname); + goto out; + } + + *size = sb.st_size; + +out: + close(fd); + return addr; +} + +static int elf_parse(const char *fname, void *addr, uint32_t types) +{ + Elf_Ehdr *ehdr = addr; + uint16_t type; + + switch (ehdr->e32.e_ident[EI_DATA]) { + case ELFDATA2LSB: + elf_parser.r = rle; + elf_parser.r2 = r2le; + elf_parser.r8 = r8le; + elf_parser.w = wle; + elf_parser.w8 = w8le; + break; + case ELFDATA2MSB: + elf_parser.r = rbe; + elf_parser.r2 = r2be; + elf_parser.r8 = r8be; + elf_parser.w = wbe; + elf_parser.w8 = w8be; + break; + default: + fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", + ehdr->e32.e_ident[EI_DATA], fname); + return -1; + } + + if (memcmp(ELFMAG, ehdr->e32.e_ident, SELFMAG) != 0 || + ehdr->e32.e_ident[EI_VERSION] != EV_CURRENT) { + fprintf(stderr, "unrecognized ELF file %s\n", fname); + return -1; + } + + type = elf_parser.r2(&ehdr->e32.e_type); + if (!((1 << type) & types)) { + fprintf(stderr, "Invalid ELF type file %s\n", fname); + return -1; + } + + switch (ehdr->e32.e_ident[EI_CLASS]) { + case ELFCLASS32: { + elf_parser.ehdr_shoff = ehdr32_shoff; + elf_parser.ehdr_shentsize = ehdr32_shentsize; + elf_parser.ehdr_shstrndx = ehdr32_shstrndx; + elf_parser.ehdr_shnum = ehdr32_shnum; + elf_parser.shdr_addr = shdr32_addr; + elf_parser.shdr_offset = shdr32_offset; + elf_parser.shdr_link = shdr32_link; + elf_parser.shdr_size = shdr32_size; + elf_parser.shdr_name = shdr32_name; + elf_parser.shdr_type = shdr32_type; + elf_parser.shdr_entsize = shdr32_entsize; + elf_parser.sym_type = sym32_type; + elf_parser.sym_name = sym32_name; + elf_parser.sym_value = sym32_value; + elf_parser.sym_shndx = sym32_shndx; + elf_parser.rela_offset = rela32_offset; + elf_parser.rela_info = rela32_info; + elf_parser.rela_addend = rela32_addend; + elf_parser.rela_write_addend = rela32_write_addend; + + if (elf_parser.r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || + elf_parser.r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { + fprintf(stderr, + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); + return -1; + } + + } + break; + case ELFCLASS64: { + elf_parser.ehdr_shoff = ehdr64_shoff; + elf_parser.ehdr_shentsize = ehdr64_shentsize; + elf_parser.ehdr_shstrndx = ehdr64_shstrndx; + elf_parser.ehdr_shnum = ehdr64_shnum; + elf_parser.shdr_addr = shdr64_addr; + elf_parser.shdr_offset = shdr64_offset; + elf_parser.shdr_link = shdr64_link; + elf_parser.shdr_size = shdr64_size; + elf_parser.shdr_name = shdr64_name; + elf_parser.shdr_type = shdr64_type; + elf_parser.shdr_entsize = shdr64_entsize; + elf_parser.sym_type = sym64_type; + elf_parser.sym_name = sym64_name; + elf_parser.sym_value = sym64_value; + elf_parser.sym_shndx = sym64_shndx; + elf_parser.rela_offset = rela64_offset; + elf_parser.rela_info = rela64_info; + elf_parser.rela_addend = rela64_addend; + elf_parser.rela_write_addend = rela64_write_addend; + + if (elf_parser.r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || + elf_parser.r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { + fprintf(stderr, + "unrecognized ET_EXEC/ET_DYN file: %s\n", + fname); + return -1; + } + + } + break; + default: + fprintf(stderr, "unrecognized ELF class %d %s\n", + ehdr->e32.e_ident[EI_CLASS], fname); + return -1; + } + return 0; +} + +int elf_map_machine(void *addr) +{ + Elf_Ehdr *ehdr = addr; + + return elf_parser.r2(&ehdr->e32.e_machine); +} + +int elf_map_long_size(void *addr) +{ + Elf_Ehdr *ehdr = addr; + + return ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32 ? 4 : 8; +} + +void *elf_map(char const *fname, size_t *size, uint32_t types) +{ + void *addr; + int ret; + + addr = map_file(fname, size); + if (!addr) + return NULL; + + ret = elf_parse(fname, addr, types); + if (ret < 0) { + elf_unmap(addr, *size); + return NULL; + } + + return addr; +} + +void elf_unmap(void *addr, size_t size) +{ + munmap(addr, size); +} diff --git a/scripts/elf-parse.h b/scripts/elf-parse.h new file mode 100644 index 000000000000..f4411e03069d --- /dev/null +++ b/scripts/elf-parse.h @@ -0,0 +1,305 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _SCRIPTS_ELF_PARSE_H +#define _SCRIPTS_ELF_PARSE_H + +#include + +#include +#include + +typedef union { + Elf32_Ehdr e32; + Elf64_Ehdr e64; +} Elf_Ehdr; + +typedef union { + Elf32_Shdr e32; + Elf64_Shdr e64; +} Elf_Shdr; + +typedef union { + Elf32_Sym e32; + Elf64_Sym e64; +} Elf_Sym; + +typedef union { + Elf32_Rela e32; + Elf64_Rela e64; +} Elf_Rela; + +struct elf_funcs { + int (*compare_extable)(const void *a, const void *b); + uint64_t (*ehdr_shoff)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shstrndx)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shentsize)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shnum)(Elf_Ehdr *ehdr); + uint64_t (*shdr_addr)(Elf_Shdr *shdr); + uint64_t (*shdr_offset)(Elf_Shdr *shdr); + uint64_t (*shdr_size)(Elf_Shdr *shdr); + uint64_t (*shdr_entsize)(Elf_Shdr *shdr); + uint32_t (*shdr_link)(Elf_Shdr *shdr); + uint32_t (*shdr_name)(Elf_Shdr *shdr); + uint32_t (*shdr_type)(Elf_Shdr *shdr); + uint8_t (*sym_type)(Elf_Sym *sym); + uint32_t (*sym_name)(Elf_Sym *sym); + uint64_t (*sym_value)(Elf_Sym *sym); + uint16_t (*sym_shndx)(Elf_Sym *sym); + uint64_t (*rela_offset)(Elf_Rela *rela); + uint64_t (*rela_info)(Elf_Rela *rela); + uint64_t (*rela_addend)(Elf_Rela *rela); + void (*rela_write_addend)(Elf_Rela *rela, uint64_t val); + uint32_t (*r)(const uint32_t *); + uint16_t (*r2)(const uint16_t *); + uint64_t (*r8)(const uint64_t *); + void (*w)(uint32_t, uint32_t *); + void (*w8)(uint64_t, uint64_t *); +}; + +extern struct elf_funcs elf_parser; + +static inline uint64_t ehdr64_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.r8(&ehdr->e64.e_shoff); +} + +static inline uint64_t ehdr32_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.r(&ehdr->e32.e_shoff); +} + +static inline uint64_t ehdr_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.ehdr_shoff(ehdr); +} + +#define EHDR_HALF(fn_name) \ +static inline uint16_t ehdr64_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.r2(&ehdr->e64.e_##fn_name); \ +} \ + \ +static inline uint16_t ehdr32_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.r2(&ehdr->e32.e_##fn_name); \ +} \ + \ +static inline uint16_t ehdr_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.ehdr_##fn_name(ehdr); \ +} + +EHDR_HALF(shentsize) +EHDR_HALF(shstrndx) +EHDR_HALF(shnum) + +#define SHDR_WORD(fn_name) \ +static inline uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +#define SHDR_ADDR(fn_name) \ +static inline uint64_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r8(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint64_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ + \ +static inline uint64_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +#define SHDR_WORD(fn_name) \ +static inline uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ +static inline uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +SHDR_ADDR(addr) +SHDR_ADDR(offset) +SHDR_ADDR(size) +SHDR_ADDR(entsize) + +SHDR_WORD(link) +SHDR_WORD(name) +SHDR_WORD(type) + +#define SYM_ADDR(fn_name) \ +static inline uint64_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r8(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint64_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint64_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +#define SYM_WORD(fn_name) \ +static inline uint32_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint32_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint32_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +#define SYM_HALF(fn_name) \ +static inline uint16_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r2(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint16_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r2(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint16_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +static inline uint8_t sym64_type(Elf_Sym *sym) +{ + return ELF64_ST_TYPE(sym->e64.st_info); +} + +static inline uint8_t sym32_type(Elf_Sym *sym) +{ + return ELF32_ST_TYPE(sym->e32.st_info); +} + +static inline uint8_t sym_type(Elf_Sym *sym) +{ + return elf_parser.sym_type(sym); +} + +SYM_ADDR(value) +SYM_WORD(name) +SYM_HALF(shndx) + +#define __maybe_unused __attribute__((__unused__)) + +#define RELA_ADDR(fn_name) \ +static inline uint64_t rela64_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.r8((uint64_t *)&rela->e64.r_##fn_name); \ +} \ + \ +static inline uint64_t rela32_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.r((uint32_t *)&rela->e32.r_##fn_name); \ +} \ + \ +static inline uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.rela_##fn_name(rela); \ +} + +RELA_ADDR(offset) +RELA_ADDR(info) +RELA_ADDR(addend) + +static inline void rela64_write_addend(Elf_Rela *rela, uint64_t val) +{ + elf_parser.w8(val, (uint64_t *)&rela->e64.r_addend); +} + +static inline void rela32_write_addend(Elf_Rela *rela, uint64_t val) +{ + elf_parser.w(val, (uint32_t *)&rela->e32.r_addend); +} + +static inline uint32_t rbe(const uint32_t *x) +{ + return get_unaligned_be32(x); +} + +static inline uint16_t r2be(const uint16_t *x) +{ + return get_unaligned_be16(x); +} + +static inline uint64_t r8be(const uint64_t *x) +{ + return get_unaligned_be64(x); +} + +static inline uint32_t rle(const uint32_t *x) +{ + return get_unaligned_le32(x); +} + +static inline uint16_t r2le(const uint16_t *x) +{ + return get_unaligned_le16(x); +} + +static inline uint64_t r8le(const uint64_t *x) +{ + return get_unaligned_le64(x); +} + +static inline void wbe(uint32_t val, uint32_t *x) +{ + put_unaligned_be32(val, x); +} + +static inline void wle(uint32_t val, uint32_t *x) +{ + put_unaligned_le32(val, x); +} + +static inline void w8be(uint64_t val, uint64_t *x) +{ + put_unaligned_be64(val, x); +} + +static inline void w8le(uint64_t val, uint64_t *x) +{ + put_unaligned_le64(val, x); +} + +void *elf_map(char const *fname, size_t *size, uint32_t types); +void elf_unmap(void *addr, size_t size); +int elf_map_machine(void *addr); +int elf_map_long_size(void *addr); + +#endif /* _SCRIPTS_ELF_PARSE_H */ diff --git a/scripts/sorttable.c b/scripts/sorttable.c index deed676bfe38..e8ed11c680c6 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -21,10 +21,8 @@ */ #include -#include #include #include -#include #include #include #include @@ -34,8 +32,7 @@ #include #include -#include -#include +#include "elf-parse.h" #ifndef EM_ARCOMPACT #define EM_ARCOMPACT 93 @@ -65,335 +62,8 @@ #define EM_LOONGARCH 258 #endif -typedef union { - Elf32_Ehdr e32; - Elf64_Ehdr e64; -} Elf_Ehdr; - -typedef union { - Elf32_Shdr e32; - Elf64_Shdr e64; -} Elf_Shdr; - -typedef union { - Elf32_Sym e32; - Elf64_Sym e64; -} Elf_Sym; - -typedef union { - Elf32_Rela e32; - Elf64_Rela e64; -} Elf_Rela; - -static uint32_t (*r)(const uint32_t *); -static uint16_t (*r2)(const uint16_t *); -static uint64_t (*r8)(const uint64_t *); -static void (*w)(uint32_t, uint32_t *); -static void (*w8)(uint64_t, uint64_t *); typedef void (*table_sort_t)(char *, int); -static struct elf_funcs { - int (*compare_extable)(const void *a, const void *b); - uint64_t (*ehdr_shoff)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shstrndx)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shentsize)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shnum)(Elf_Ehdr *ehdr); - uint64_t (*shdr_addr)(Elf_Shdr *shdr); - uint64_t (*shdr_offset)(Elf_Shdr *shdr); - uint64_t (*shdr_size)(Elf_Shdr *shdr); - uint64_t (*shdr_entsize)(Elf_Shdr *shdr); - uint32_t (*shdr_link)(Elf_Shdr *shdr); - uint32_t (*shdr_name)(Elf_Shdr *shdr); - uint32_t (*shdr_type)(Elf_Shdr *shdr); - uint8_t (*sym_type)(Elf_Sym *sym); - uint32_t (*sym_name)(Elf_Sym *sym); - uint64_t (*sym_value)(Elf_Sym *sym); - uint16_t (*sym_shndx)(Elf_Sym *sym); - uint64_t (*rela_offset)(Elf_Rela *rela); - uint64_t (*rela_info)(Elf_Rela *rela); - uint64_t (*rela_addend)(Elf_Rela *rela); - void (*rela_write_addend)(Elf_Rela *rela, uint64_t val); -} e; - -static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr) -{ - return r8(&ehdr->e64.e_shoff); -} - -static uint64_t ehdr32_shoff(Elf_Ehdr *ehdr) -{ - return r(&ehdr->e32.e_shoff); -} - -static uint64_t ehdr_shoff(Elf_Ehdr *ehdr) -{ - return e.ehdr_shoff(ehdr); -} - -#define EHDR_HALF(fn_name) \ -static uint16_t ehdr64_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return r2(&ehdr->e64.e_##fn_name); \ -} \ - \ -static uint16_t ehdr32_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return r2(&ehdr->e32.e_##fn_name); \ -} \ - \ -static uint16_t ehdr_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return e.ehdr_##fn_name(ehdr); \ -} - -EHDR_HALF(shentsize) -EHDR_HALF(shstrndx) -EHDR_HALF(shnum) - -#define SHDR_WORD(fn_name) \ -static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ - \ -static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -#define SHDR_ADDR(fn_name) \ -static uint64_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r8(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint64_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ - \ -static uint64_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -#define SHDR_WORD(fn_name) \ -static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ -static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -SHDR_ADDR(addr) -SHDR_ADDR(offset) -SHDR_ADDR(size) -SHDR_ADDR(entsize) - -SHDR_WORD(link) -SHDR_WORD(name) -SHDR_WORD(type) - -#define SYM_ADDR(fn_name) \ -static uint64_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r8(&sym->e64.st_##fn_name); \ -} \ - \ -static uint64_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e32.st_##fn_name); \ -} \ - \ -static uint64_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -#define SYM_WORD(fn_name) \ -static uint32_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e64.st_##fn_name); \ -} \ - \ -static uint32_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e32.st_##fn_name); \ -} \ - \ -static uint32_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -#define SYM_HALF(fn_name) \ -static uint16_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r2(&sym->e64.st_##fn_name); \ -} \ - \ -static uint16_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r2(&sym->e32.st_##fn_name); \ -} \ - \ -static uint16_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -static uint8_t sym64_type(Elf_Sym *sym) -{ - return ELF64_ST_TYPE(sym->e64.st_info); -} - -static uint8_t sym32_type(Elf_Sym *sym) -{ - return ELF32_ST_TYPE(sym->e32.st_info); -} - -static uint8_t sym_type(Elf_Sym *sym) -{ - return e.sym_type(sym); -} - -SYM_ADDR(value) -SYM_WORD(name) -SYM_HALF(shndx) - -#define __maybe_unused __attribute__((__unused__)) - -#define RELA_ADDR(fn_name) \ -static uint64_t rela64_##fn_name(Elf_Rela *rela) \ -{ \ - return r8((uint64_t *)&rela->e64.r_##fn_name); \ -} \ - \ -static uint64_t rela32_##fn_name(Elf_Rela *rela) \ -{ \ - return r((uint32_t *)&rela->e32.r_##fn_name); \ -} \ - \ -static uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \ -{ \ - return e.rela_##fn_name(rela); \ -} - -RELA_ADDR(offset) -RELA_ADDR(info) -RELA_ADDR(addend) - -static void rela64_write_addend(Elf_Rela *rela, uint64_t val) -{ - w8(val, (uint64_t *)&rela->e64.r_addend); -} - -static void rela32_write_addend(Elf_Rela *rela, uint64_t val) -{ - w(val, (uint32_t *)&rela->e32.r_addend); -} - -/* - * Get the whole file as a programming convenience in order to avoid - * malloc+lseek+read+free of many pieces. If successful, then mmap - * avoids copying unused pieces; else just read the whole file. - * Open for both read and write. - */ -static void *mmap_file(char const *fname, size_t *size) -{ - int fd; - struct stat sb; - void *addr = NULL; - - fd = open(fname, O_RDWR); - if (fd < 0) { - perror(fname); - return NULL; - } - if (fstat(fd, &sb) < 0) { - perror(fname); - goto out; - } - if (!S_ISREG(sb.st_mode)) { - fprintf(stderr, "not a regular file: %s\n", fname); - goto out; - } - - addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (addr == MAP_FAILED) { - fprintf(stderr, "Could not mmap file: %s\n", fname); - goto out; - } - - *size = sb.st_size; - -out: - close(fd); - return addr; -} - -static uint32_t rbe(const uint32_t *x) -{ - return get_unaligned_be32(x); -} - -static uint16_t r2be(const uint16_t *x) -{ - return get_unaligned_be16(x); -} - -static uint64_t r8be(const uint64_t *x) -{ - return get_unaligned_be64(x); -} - -static uint32_t rle(const uint32_t *x) -{ - return get_unaligned_le32(x); -} - -static uint16_t r2le(const uint16_t *x) -{ - return get_unaligned_le16(x); -} - -static uint64_t r8le(const uint64_t *x) -{ - return get_unaligned_le64(x); -} - -static void wbe(uint32_t val, uint32_t *x) -{ - put_unaligned_be32(val, x); -} - -static void wle(uint32_t val, uint32_t *x) -{ - put_unaligned_le32(val, x); -} - -static void w8be(uint64_t val, uint64_t *x) -{ - put_unaligned_be64(val, x); -} - -static void w8le(uint64_t val, uint64_t *x) -{ - put_unaligned_le64(val, x); -} - /* * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of * the way to -256..-1, to avoid conflicting with real section @@ -415,13 +85,13 @@ static inline unsigned int get_secindex(unsigned int shndx, return SPECIAL(shndx); if (shndx != SHN_XINDEX) return shndx; - return r(&symtab_shndx_start[sym_offs]); + return elf_parser.r(&symtab_shndx_start[sym_offs]); } static int compare_extable_32(const void *a, const void *b) { - Elf32_Addr av = r(a); - Elf32_Addr bv = r(b); + Elf32_Addr av = elf_parser.r(a); + Elf32_Addr bv = elf_parser.r(b); if (av < bv) return -1; @@ -430,18 +100,15 @@ static int compare_extable_32(const void *a, const void *b) static int compare_extable_64(const void *a, const void *b) { - Elf64_Addr av = r8(a); - Elf64_Addr bv = r8(b); + Elf64_Addr av = elf_parser.r8(a); + Elf64_Addr bv = elf_parser.r8(b); if (av < bv) return -1; return av > bv; } -static int compare_extable(const void *a, const void *b) -{ - return e.compare_extable(a, b); -} +static int (*compare_extable)(const void *a, const void *b); static inline void *get_index(void *start, int entsize, int index) { @@ -577,7 +244,7 @@ static int (*compare_values)(const void *a, const void *b); /* Only used for sorting mcount table */ static void rela_write_addend(Elf_Rela *rela, uint64_t val) { - e.rela_write_addend(rela, val); + elf_parser.rela_write_addend(rela, val); } struct func_info { @@ -792,9 +459,9 @@ static int fill_addrs(void *ptr, uint64_t size, void *addrs) for (; ptr < end; ptr += long_size, addrs += long_size, count++) { if (long_size == 4) - *(uint32_t *)ptr = r(addrs); + *(uint32_t *)ptr = elf_parser.r(addrs); else - *(uint64_t *)ptr = r8(addrs); + *(uint64_t *)ptr = elf_parser.r8(addrs); } return count; } @@ -805,9 +472,9 @@ static void replace_addrs(void *ptr, uint64_t size, void *addrs) for (; ptr < end; ptr += long_size, addrs += long_size) { if (long_size == 4) - w(*(uint32_t *)ptr, addrs); + elf_parser.w(*(uint32_t *)ptr, addrs); else - w8(*(uint64_t *)ptr, addrs); + elf_parser.w8(*(uint64_t *)ptr, addrs); } } @@ -1111,7 +778,7 @@ static int do_sort(Elf_Ehdr *ehdr, sym_value(sort_needed_sym) - shdr_addr(sort_needed_sec); /* extable has been sorted, clear the flag */ - w(0, sort_needed_loc); + elf_parser.w(0, sort_needed_loc); rc = 0; out: @@ -1155,8 +822,8 @@ out: static int compare_relative_table(const void *a, const void *b) { - int32_t av = (int32_t)r(a); - int32_t bv = (int32_t)r(b); + int32_t av = (int32_t)elf_parser.r(a); + int32_t bv = (int32_t)elf_parser.r(b); if (av < bv) return -1; @@ -1175,7 +842,7 @@ static void sort_relative_table(char *extab_image, int image_size) */ while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) + i, loc); + elf_parser.w(elf_parser.r(loc) + i, loc); i += 4; } @@ -1185,7 +852,7 @@ static void sort_relative_table(char *extab_image, int image_size) i = 0; while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) - i, loc); + elf_parser.w(elf_parser.r(loc) - i, loc); i += 4; } } @@ -1197,8 +864,8 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) + i, loc); - w(r(loc + 1) + i + 4, loc + 1); + elf_parser.w(elf_parser.r(loc) + i, loc); + elf_parser.w(elf_parser.r(loc + 1) + i + 4, loc + 1); /* Don't touch the fixup type or data */ i += sizeof(uint32_t) * 3; @@ -1210,8 +877,8 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) - i, loc); - w(r(loc + 1) - (i + 4), loc + 1); + elf_parser.w(elf_parser.r(loc) - i, loc); + elf_parser.w(elf_parser.r(loc + 1) - (i + 4), loc + 1); /* Don't touch the fixup type or data */ i += sizeof(uint32_t) * 3; @@ -1223,35 +890,7 @@ static int do_file(char const *const fname, void *addr) Elf_Ehdr *ehdr = addr; table_sort_t custom_sort = NULL; - switch (ehdr->e32.e_ident[EI_DATA]) { - case ELFDATA2LSB: - r = rle; - r2 = r2le; - r8 = r8le; - w = wle; - w8 = w8le; - break; - case ELFDATA2MSB: - r = rbe; - r2 = r2be; - r8 = r8be; - w = wbe; - w8 = w8be; - break; - default: - fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", - ehdr->e32.e_ident[EI_DATA], fname); - return -1; - } - - if (memcmp(ELFMAG, ehdr->e32.e_ident, SELFMAG) != 0 || - (r2(&ehdr->e32.e_type) != ET_EXEC && r2(&ehdr->e32.e_type) != ET_DYN) || - ehdr->e32.e_ident[EI_VERSION] != EV_CURRENT) { - fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname); - return -1; - } - - switch (r2(&ehdr->e32.e_machine)) { + switch (elf_map_machine(ehdr)) { case EM_AARCH64: #ifdef MCOUNT_SORT_ENABLED sort_reloc = true; @@ -1281,85 +920,37 @@ static int do_file(char const *const fname, void *addr) break; default: fprintf(stderr, "unrecognized e_machine %d %s\n", - r2(&ehdr->e32.e_machine), fname); + elf_parser.r2(&ehdr->e32.e_machine), fname); return -1; } - switch (ehdr->e32.e_ident[EI_CLASS]) { - case ELFCLASS32: { - struct elf_funcs efuncs = { - .compare_extable = compare_extable_32, - .ehdr_shoff = ehdr32_shoff, - .ehdr_shentsize = ehdr32_shentsize, - .ehdr_shstrndx = ehdr32_shstrndx, - .ehdr_shnum = ehdr32_shnum, - .shdr_addr = shdr32_addr, - .shdr_offset = shdr32_offset, - .shdr_link = shdr32_link, - .shdr_size = shdr32_size, - .shdr_name = shdr32_name, - .shdr_type = shdr32_type, - .shdr_entsize = shdr32_entsize, - .sym_type = sym32_type, - .sym_name = sym32_name, - .sym_value = sym32_value, - .sym_shndx = sym32_shndx, - .rela_offset = rela32_offset, - .rela_info = rela32_info, - .rela_addend = rela32_addend, - .rela_write_addend = rela32_write_addend, - }; - - e = efuncs; + switch (elf_map_long_size(addr)) { + case 4: + compare_extable = compare_extable_32, long_size = 4; extable_ent_size = 8; - if (r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || - r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { + if (elf_parser.r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || + elf_parser.r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); return -1; } - } break; - case ELFCLASS64: { - struct elf_funcs efuncs = { - .compare_extable = compare_extable_64, - .ehdr_shoff = ehdr64_shoff, - .ehdr_shentsize = ehdr64_shentsize, - .ehdr_shstrndx = ehdr64_shstrndx, - .ehdr_shnum = ehdr64_shnum, - .shdr_addr = shdr64_addr, - .shdr_offset = shdr64_offset, - .shdr_link = shdr64_link, - .shdr_size = shdr64_size, - .shdr_name = shdr64_name, - .shdr_type = shdr64_type, - .shdr_entsize = shdr64_entsize, - .sym_type = sym64_type, - .sym_name = sym64_name, - .sym_value = sym64_value, - .sym_shndx = sym64_shndx, - .rela_offset = rela64_offset, - .rela_info = rela64_info, - .rela_addend = rela64_addend, - .rela_write_addend = rela64_write_addend, - }; - - e = efuncs; + case 8: + compare_extable = compare_extable_64, long_size = 8; extable_ent_size = 16; - if (r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || - r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { + if (elf_parser.r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || + elf_parser.r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); return -1; } - } break; default: fprintf(stderr, "unrecognized ELF class %d %s\n", @@ -1398,7 +989,7 @@ int main(int argc, char *argv[]) /* Process each file in turn, allowing deep failure. */ for (i = optind; i < argc; i++) { - addr = mmap_file(argv[i], &size); + addr = elf_map(argv[i], &size, (1 << ET_EXEC) | (1 << ET_DYN)); if (!addr) { ++n_error; continue; @@ -1407,7 +998,7 @@ int main(int argc, char *argv[]) if (do_file(argv[i], addr)) ++n_error; - munmap(addr, size); + elf_unmap(addr, size); } return !!n_error; -- cgit v1.2.3 From e30f8e61e2518a837837daa26cda3c8cc30f3226 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:40 -0400 Subject: tracing: Add a tracepoint verification check at build time If a tracepoint is defined via DECLARE_TRACE() or TRACE_EVENT() but never called (via the trace_() function), its metadata is still around in memory and not discarded. When created via TRACE_EVENT() the situation is worse because the TRACE_EVENT() creates metadata that can be around 5k per trace event. Having unused trace events causes several thousand of wasted bytes. Add a verifier that injects a string of the name of the tracepoint it calls that is added to the discarded section "__tracepoint_check". For every builtin tracepoint, its name (which is saved in the in-memory section "__tracepoint_strings") will have its name also in the "__tracepoint_check" section if it is used. Add a new program that is run on build called tracepoint-update. This is executed on the vmlinux.o before the __tracepoint_check section is discarded (the section is discarded before vmlinux is created). This program will create an array of each string in the __tracepoint_check section and then sort it. Then it will walk the strings in the __tracepoint_strings section and do a binary search to check if its name is in the __tracepoint_check section. If it is not, then it is unused and a warning is printed. Note, this currently only handles tracepoints that are builtin and not in modules. Enabling this currently with a given config produces: warning: tracepoint 'sched_move_numa' is unused. warning: tracepoint 'sched_stick_numa' is unused. warning: tracepoint 'sched_swap_numa' is unused. warning: tracepoint 'pelt_hw_tp' is unused. warning: tracepoint 'pelt_irq_tp' is unused. warning: tracepoint 'rcu_preempt_task' is unused. warning: tracepoint 'rcu_unlock_preempted_task' is unused. warning: tracepoint 'xdp_bulk_tx' is unused. warning: tracepoint 'xdp_redirect_map' is unused. warning: tracepoint 'xdp_redirect_map_err' is unused. warning: tracepoint 'vma_mas_szero' is unused. warning: tracepoint 'vma_store' is unused. warning: tracepoint 'hugepage_set_pmd' is unused. warning: tracepoint 'hugepage_set_pud' is unused. warning: tracepoint 'hugepage_update_pmd' is unused. warning: tracepoint 'hugepage_update_pud' is unused. warning: tracepoint 'block_rq_remap' is unused. warning: tracepoint 'xhci_dbc_handle_event' is unused. warning: tracepoint 'xhci_dbc_handle_transfer' is unused. warning: tracepoint 'xhci_dbc_gadget_ep_queue' is unused. warning: tracepoint 'xhci_dbc_alloc_request' is unused. warning: tracepoint 'xhci_dbc_free_request' is unused. warning: tracepoint 'xhci_dbc_queue_request' is unused. warning: tracepoint 'xhci_dbc_giveback_request' is unused. warning: tracepoint 'tcp_ao_wrong_maclen' is unused. warning: tracepoint 'tcp_ao_mismatch' is unused. warning: tracepoint 'tcp_ao_key_not_found' is unused. warning: tracepoint 'tcp_ao_rnext_request' is unused. warning: tracepoint 'tcp_ao_synack_no_key' is unused. warning: tracepoint 'tcp_ao_snd_sne_update' is unused. warning: tracepoint 'tcp_ao_rcv_sne_update' is unused. Some of the above is totally unused but others are not used due to their "trace_" functions being inside configs, in which case, the defined tracepoints should also be inside those same configs. Others are architecture specific but defined in generic code, where they should either be moved to the architecture or be surrounded by #ifdef for the architectures they are for. This tool could be updated to process modules in the future. I'd like to thank Mathieu Desnoyers for suggesting using strings instead of pointers, as using pointers in vmlinux.o required handling relocations and it required implementing almost a full feature linker to do so. To enable this check, run the build with: make UT=1 Note, when all the existing unused tracepoints are removed from the build, the "UT=1" will be removed and this will always be enabled when tracepoints are configured to warn on any new tracepoints. The reason this isn't always enabled now is because it will introduce a lot of warnings for the current unused tracepoints, and all bisects would end at this commit for those warnings. Link: https://lore.kernel.org/all/20250528114549.4d8a5e03@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004452.920728129@kernel.org Suggested-by: Mathieu Desnoyers # for using strings instead of pointers Signed-off-by: Steven Rostedt (Google) --- Makefile | 21 ++++ include/asm-generic/vmlinux.lds.h | 1 + include/linux/tracepoint.h | 11 ++ scripts/Makefile | 3 + scripts/link-vmlinux.sh | 7 ++ scripts/tracepoint-update.c | 232 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 275 insertions(+) create mode 100644 scripts/tracepoint-update.c (limited to 'scripts') diff --git a/Makefile b/Makefile index d14824792227..9823c20c4278 100644 --- a/Makefile +++ b/Makefile @@ -810,6 +810,25 @@ ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg endif +ifdef CONFIG_TRACEPOINTS +# To check for unused tracepoints (tracepoints that are defined but never +# called), run with: +# +# make UT=1 +# +# Each unused tracepoints can take up to 5KB of memory in the running kernel. +# It is best to remove any that are not used. +# +# This command line option will be removed when all current unused +# tracepoints are removed. + +ifeq ("$(origin UT)", "command line") + WARN_ON_UNUSED_TRACEPOINTS := $(UT) +endif +endif # CONFIG_TRACEPOINTS + +export WARN_ON_UNUSED_TRACEPOINTS + include $(srctree)/arch/$(SRCARCH)/Makefile ifdef need-config @@ -1772,6 +1791,8 @@ help: @echo ' c: extra checks in the configuration stage (Kconfig)' @echo ' e: warnings are being treated as errors' @echo ' Multiple levels can be combined with W=12 or W=123' + @echo ' make UT=1 [targets] Warn if a tracepoint is defined but not used.' + @echo ' [ This will be removed when all current unused tracepoints are eliminated. ]' @$(if $(dtstree), \ echo ' make CHECK_DTBS=1 [targets] Check all generated dtb files against schema'; \ echo ' This can be applied both to "dtbs" and to individual "foo.dtb" targets' ; \ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a9a2e732a65..c510fb097a8c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1048,6 +1048,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.no_trim_symbol) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ + *(__tracepoint_check) \ #define DISCARDS \ /DISCARD/ : { \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 826ce3f8e1f8..1e53d3626c78 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -221,6 +221,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __do_trace_##name(args); \ } +/* + * When a tracepoint is used, it's name is added to the __tracepoint_check + * section. This section is only used at build time to make sure all + * defined tracepoints are used. It is discarded after the build. + */ +# define TRACEPOINT_CHECK(name) \ + static const char __used __section("__tracepoint_check") __trace_check[] = \ + #name; + /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -270,6 +279,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ + TRACEPOINT_CHECK(name) \ if (cond) { \ guard(preempt_notrace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ @@ -289,6 +299,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ + TRACEPOINT_CHECK(name) \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ diff --git a/scripts/Makefile b/scripts/Makefile index f19624b3ed92..0941e5ce7b57 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,8 +11,10 @@ hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen +hostprogs-always-$(CONFIG_TRACEPOINTS) += tracepoint-update sorttable-objs := sorttable.o elf-parse.o +tracepoint-update-objs := tracepoint-update.o elf-parse.o ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) always-$(CONFIG_RUST) += target.json @@ -27,6 +29,7 @@ generate_rust_target-rust := y rustdoc_test_builder-rust := y rustdoc_test_gen-rust := y +HOSTCFLAGS_tracepoint-update.o = -I$(srctree)/tools/include HOSTCFLAGS_elf-parse.o = -I$(srctree)/tools/include HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTLDLIBS_sorttable = -lpthread diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 433849ff7529..d304029fa6da 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -208,6 +208,13 @@ kallsymso= strip_debug= generate_map= +# Use "make UT=1" to trigger warnings on unused tracepoints +case "${WARN_ON_UNUSED_TRACEPOINTS}" in +*1*) + ${objtree}/scripts/tracepoint-update vmlinux.o + ;; +esac + if is_enabled CONFIG_KALLSYMS; then true > .tmp_vmlinux0.syms kallsyms .tmp_vmlinux0.syms .tmp_vmlinux0.kallsyms diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c new file mode 100644 index 000000000000..6ec30f39d0ad --- /dev/null +++ b/scripts/tracepoint-update.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elf-parse.h" + +static Elf_Shdr *check_data_sec; +static Elf_Shdr *tracepoint_data_sec; + +static inline void *get_index(void *start, int entsize, int index) +{ + return start + (entsize * index); +} + +static int compare_strings(const void *a, const void *b) +{ + const char *av = *(const char **)a; + const char *bv = *(const char **)b; + + return strcmp(av, bv); +} + +struct elf_tracepoint { + Elf_Ehdr *ehdr; + const char **array; + int count; +}; + +#define REALLOC_SIZE (1 << 10) +#define REALLOC_MASK (REALLOC_SIZE - 1) + +static int add_string(const char *str, const char ***vals, int *count) +{ + const char **array = *vals; + + if (!(*count & REALLOC_MASK)) { + int size = (*count) + REALLOC_SIZE; + + array = realloc(array, sizeof(char *) * size); + if (!array) { + fprintf(stderr, "Failed memory allocation\n"); + return -1; + } + *vals = array; + } + + array[(*count)++] = str; + return 0; +} + +/** + * for_each_shdr_str - iterator that reads strings that are in an ELF section. + * @len: "int" to hold the length of the current string + * @ehdr: A pointer to the ehdr of the ELF file + * @sec: The section that has the strings to iterate on + * + * This is a for loop that iterates over all the nul terminated strings + * that are in a given ELF section. The variable "str" will hold + * the current string for each iteration and the passed in @len will + * contain the strlen() of that string. + */ +#define for_each_shdr_str(len, ehdr, sec) \ + for (const char *str = (void *)(ehdr) + shdr_offset(sec), \ + *end = str + shdr_size(sec); \ + len = strlen(str), str < end; \ + str += (len) + 1) + + +static void make_trace_array(struct elf_tracepoint *etrace) +{ + Elf_Ehdr *ehdr = etrace->ehdr; + const char **vals = NULL; + int count = 0; + int len; + + etrace->array = NULL; + + /* + * The __tracepoint_check section is filled with strings of the + * names of tracepoints (in tracepoint_strings). Create an array + * that points to each string and then sort the array. + */ + for_each_shdr_str(len, ehdr, check_data_sec) { + if (!len) + continue; + if (add_string(str, &vals, &count) < 0) + return; + } + + /* If CONFIG_TRACEPOINT_VERIFY_USED is not set, there's nothing to do */ + if (!count) + return; + + qsort(vals, count, sizeof(char *), compare_strings); + + etrace->array = vals; + etrace->count = count; +} + +static int find_event(const char *str, void *array, size_t size) +{ + return bsearch(&str, array, size, sizeof(char *), compare_strings) != NULL; +} + +static void check_tracepoints(struct elf_tracepoint *etrace) +{ + Elf_Ehdr *ehdr = etrace->ehdr; + int len; + + if (!etrace->array) + return; + + /* + * The __tracepoints_strings section holds all the names of the + * defined tracepoints. If any of them are not in the + * __tracepoint_check_section it means they are not used. + */ + for_each_shdr_str(len, ehdr, tracepoint_data_sec) { + if (!len) + continue; + if (!find_event(str, etrace->array, etrace->count)) { + fprintf(stderr, "warning: tracepoint '%s' is unused.\n", str); + } + } + + free(etrace->array); +} + +static void *tracepoint_check(struct elf_tracepoint *etrace) +{ + make_trace_array(etrace); + check_tracepoints(etrace); + + return NULL; +} + +static int process_tracepoints(void *addr, char const *const fname) +{ + struct elf_tracepoint etrace = {0}; + Elf_Ehdr *ehdr = addr; + Elf_Shdr *shdr_start; + Elf_Shdr *string_sec; + const char *secstrings; + unsigned int shnum; + unsigned int shstrndx; + int shentsize; + int idx; + int done = 2; + + shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr)); + shentsize = ehdr_shentsize(ehdr); + + shstrndx = ehdr_shstrndx(ehdr); + if (shstrndx == SHN_XINDEX) + shstrndx = shdr_link(shdr_start); + string_sec = get_index(shdr_start, shentsize, shstrndx); + secstrings = (const char *)ehdr + shdr_offset(string_sec); + + shnum = ehdr_shnum(ehdr); + if (shnum == SHN_UNDEF) + shnum = shdr_size(shdr_start); + + for (int i = 0; done && i < shnum; i++) { + Elf_Shdr *shdr = get_index(shdr_start, shentsize, i); + + idx = shdr_name(shdr); + + /* locate the __tracepoint_check in vmlinux */ + if (!strcmp(secstrings + idx, "__tracepoint_check")) { + check_data_sec = shdr; + done--; + } + + /* locate the __tracepoints_ptrs section in vmlinux */ + if (!strcmp(secstrings + idx, "__tracepoints_strings")) { + tracepoint_data_sec = shdr; + done--; + } + } + + if (!check_data_sec) { + fprintf(stderr, "no __tracepoint_check in file: %s\n", fname); + return -1; + } + + if (!tracepoint_data_sec) { + fprintf(stderr, "no __tracepoint_strings in file: %s\n", fname); + return -1; + } + + etrace.ehdr = ehdr; + tracepoint_check(&etrace); + return 0; +} + +int main(int argc, char *argv[]) +{ + int n_error = 0; + size_t size = 0; + void *addr = NULL; + + if (argc < 2) { + fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); + return 0; + } + + /* Process each file in turn, allowing deep failure. */ + for (int i = 1; i < argc; i++) { + addr = elf_map(argv[i], &size, 1 << ET_REL); + if (!addr) { + ++n_error; + continue; + } + + if (process_tracepoints(addr, argv[i])) + ++n_error; + + elf_unmap(addr, size); + } + + return !!n_error; +} -- cgit v1.2.3 From eec3516b25069d8cb51b78375e337c69a3f9e789 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:42 -0400 Subject: tracing: Allow tracepoint-update.c to work with modules In order for tracepoint-update.c to work with modules, it cannot error out if both "__tracepoint_check" and "__tracepoints_strings" are not found. When enabled, the vmlinux.o may be required to have both, but modules only have these sections if they have tracepoints. Modules without tracepoints will not have either. They should not fail to build because of that. If one section exists the other one should too. Note, if a module defines a tracepoint but doesn't use any, it can cause this to fail. Add a new "--module" parameter to tracepoint-update to be used when running on module code. It will not error out if this is set and both sections are missing. If this is set, and only the "__tracepoint_check" section is missing, it means the module has defined tracepoints but none of them are used. In that case, it prints a warning that the module has only unused tracepoints and exits normally to not fail the build. If the "__tracepoint_check" section exists but not the "__tracepoint_strings", then that is an error and should fail the build. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004453.255696445@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/tracepoint-update.c | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c index 6ec30f39d0ad..7f7d90df14ce 100644 --- a/scripts/tracepoint-update.c +++ b/scripts/tracepoint-update.c @@ -112,7 +112,7 @@ static int find_event(const char *str, void *array, size_t size) return bsearch(&str, array, size, sizeof(char *), compare_strings) != NULL; } -static void check_tracepoints(struct elf_tracepoint *etrace) +static void check_tracepoints(struct elf_tracepoint *etrace, const char *fname) { Elf_Ehdr *ehdr = etrace->ehdr; int len; @@ -129,22 +129,26 @@ static void check_tracepoints(struct elf_tracepoint *etrace) if (!len) continue; if (!find_event(str, etrace->array, etrace->count)) { - fprintf(stderr, "warning: tracepoint '%s' is unused.\n", str); + fprintf(stderr, "warning: tracepoint '%s' is unused", str); + if (fname) + fprintf(stderr, " in module %s\n", fname); + else + fprintf(stderr, "\n"); } } free(etrace->array); } -static void *tracepoint_check(struct elf_tracepoint *etrace) +static void *tracepoint_check(struct elf_tracepoint *etrace, const char *fname) { make_trace_array(etrace); - check_tracepoints(etrace); + check_tracepoints(etrace, fname); return NULL; } -static int process_tracepoints(void *addr, char const *const fname) +static int process_tracepoints(bool mod, void *addr, const char *fname) { struct elf_tracepoint etrace = {0}; Elf_Ehdr *ehdr = addr; @@ -188,7 +192,19 @@ static int process_tracepoints(void *addr, char const *const fname) } } + /* + * Modules may not have either section. But if it has one section, + * it should have both of them. + */ + if (mod && !check_data_sec && !tracepoint_data_sec) + return 0; + if (!check_data_sec) { + if (mod) { + fprintf(stderr, "warning: Module %s has only unused tracepoints\n", fname); + /* Do not fail build */ + return 0; + } fprintf(stderr, "no __tracepoint_check in file: %s\n", fname); return -1; } @@ -198,8 +214,11 @@ static int process_tracepoints(void *addr, char const *const fname) return -1; } + if (!mod) + fname = NULL; + etrace.ehdr = ehdr; - tracepoint_check(&etrace); + tracepoint_check(&etrace, fname); return 0; } @@ -208,9 +227,19 @@ int main(int argc, char *argv[]) int n_error = 0; size_t size = 0; void *addr = NULL; + bool mod = false; + + if (argc > 1 && strcmp(argv[1], "--module") == 0) { + mod = true; + argc--; + argv++; + } if (argc < 2) { - fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); + if (mod) + fprintf(stderr, "usage: tracepoint-update --module module...\n"); + else + fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); return 0; } @@ -222,7 +251,7 @@ int main(int argc, char *argv[]) continue; } - if (process_tracepoints(addr, argv[i])) + if (process_tracepoints(mod, addr, argv[i])) ++n_error; elf_unmap(addr, size); -- cgit v1.2.3 From 01ecf7af00b86daf7ac441b9f94d4873d2b8fc74 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:43 -0400 Subject: tracing: Add warnings for unused tracepoints for modules If a modules has TRACE_EVENT() but does not use it, add a warning about it at build time. Currently, the build must be made by adding "UT=1" to the make command line in order for this to trigger. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004453.422000794@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/Makefile.modfinal | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 542ba462ed3e..149e12ff5700 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -28,6 +28,10 @@ ccflags-remove-y := $(CC_FLAGS_CFI) .module-common.o: $(srctree)/scripts/module-common.c FORCE $(call if_changed_rule,cc_o_c) +ifneq ($(WARN_ON_UNUSED_TRACEPOINTS),) +cmd_check_tracepoint = $(objtree)/scripts/tracepoint-update --module $<; +endif + quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ @@ -57,6 +61,7 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) endif + +$(call cmd,check_tracepoint) targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) .module-common.o -- cgit v1.2.3 From 909597fa01f28025d601090b12a028eac71af946 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 10:50:20 -0600 Subject: docs: Move the "features" tools to tools/docs The scripts for managing the features docs are found in three different directories; unite them all under tools/docs and update references as needed. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/features/list-arch.sh | 11 - Documentation/features/scripts/features-refresh.sh | 98 ---- Documentation/sphinx/kernel_feat.py | 4 +- scripts/get_feat.pl | 641 --------------------- tools/docs/features-refresh.sh | 98 ++++ tools/docs/get_feat.pl | 641 +++++++++++++++++++++ tools/docs/list-arch.sh | 11 + 7 files changed, 752 insertions(+), 752 deletions(-) delete mode 100755 Documentation/features/list-arch.sh delete mode 100755 Documentation/features/scripts/features-refresh.sh delete mode 100755 scripts/get_feat.pl create mode 100755 tools/docs/features-refresh.sh create mode 100755 tools/docs/get_feat.pl create mode 100755 tools/docs/list-arch.sh (limited to 'scripts') diff --git a/Documentation/features/list-arch.sh b/Documentation/features/list-arch.sh deleted file mode 100755 index ac8ff7f6f859..000000000000 --- a/Documentation/features/list-arch.sh +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Small script that visualizes the kernel feature support status -# of an architecture. -# -# (If no arguments are given then it will print the host architecture's status.) -# - -ARCH=${1:-$(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/')} - -$(dirname $0)/../../scripts/get_feat.pl list --arch $ARCH diff --git a/Documentation/features/scripts/features-refresh.sh b/Documentation/features/scripts/features-refresh.sh deleted file mode 100755 index c2288124e94a..000000000000 --- a/Documentation/features/scripts/features-refresh.sh +++ /dev/null @@ -1,98 +0,0 @@ -# -# Small script that refreshes the kernel feature support status in place. -# - -for F_FILE in Documentation/features/*/*/arch-support.txt; do - F=$(grep "^# Kconfig:" "$F_FILE" | cut -c26-) - - # - # Each feature F is identified by a pair (O, K), where 'O' can - # be either the empty string (for 'nop') or "not" (the logical - # negation operator '!'); other operators are not supported. - # - O="" - K=$F - if [[ "$F" == !* ]]; then - O="not" - K=$(echo $F | sed -e 's/^!//g') - fi - - # - # F := (O, K) is 'valid' iff there is a Kconfig file (for some - # arch) which contains K. - # - # Notice that this definition entails an 'asymmetry' between - # the case 'O = ""' and the case 'O = "not"'. E.g., F may be - # _invalid_ if: - # - # [case 'O = ""'] - # 1) no arch provides support for F, - # 2) K does not exist (e.g., it was renamed/mis-typed); - # - # [case 'O = "not"'] - # 3) all archs provide support for F, - # 4) as in (2). - # - # The rationale for adopting this definition (and, thus, for - # keeping the asymmetry) is: - # - # We want to be able to 'detect' (2) (or (4)). - # - # (1) and (3) may further warn the developers about the fact - # that K can be removed. - # - F_VALID="false" - for ARCH_DIR in arch/*/; do - K_FILES=$(find $ARCH_DIR -name "Kconfig*") - K_GREP=$(grep "$K" $K_FILES) - if [ ! -z "$K_GREP" ]; then - F_VALID="true" - break - fi - done - if [ "$F_VALID" = "false" ]; then - printf "WARNING: '%s' is not a valid Kconfig\n" "$F" - fi - - T_FILE="$F_FILE.tmp" - grep "^#" $F_FILE > $T_FILE - echo " -----------------------" >> $T_FILE - echo " | arch |status|" >> $T_FILE - echo " -----------------------" >> $T_FILE - for ARCH_DIR in arch/*/; do - ARCH=$(echo $ARCH_DIR | sed -e 's/^arch//g' | sed -e 's/\///g') - K_FILES=$(find $ARCH_DIR -name "Kconfig*") - K_GREP=$(grep "$K" $K_FILES) - # - # Arch support status values for (O, K) are updated according - # to the following rules. - # - # - ("", K) is 'supported by a given arch', if there is a - # Kconfig file for that arch which contains K; - # - # - ("not", K) is 'supported by a given arch', if there is - # no Kconfig file for that arch which contains K; - # - # - otherwise: preserve the previous status value (if any), - # default to 'not yet supported'. - # - # Notice that, according these rules, invalid features may be - # updated/modified. - # - if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then - printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE - elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then - printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE - else - S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:") - if [ ! -z "$S" ]; then - echo "$S" >> $T_FILE - else - printf " |%12s: | TODO |\n" "$ARCH" \ - >> $T_FILE - fi - fi - done - echo " -----------------------" >> $T_FILE - mv $T_FILE $F_FILE -done diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index aaac76892ceb..81c67ef23d8d 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -13,7 +13,7 @@ :license: GPL Version 2, June 1991 see Linux/COPYING for details. The ``kernel-feat`` (:py:class:`KernelFeat`) directive calls the - scripts/get_feat.pl script to parse the Kernel ABI files. + tools/docs/get_feat.pl script to parse the Kernel ABI files. Overview of directive's argument and options. @@ -85,7 +85,7 @@ class KernelFeat(Directive): srctree = os.path.abspath(os.environ["srctree"]) args = [ - os.path.join(srctree, 'scripts/get_feat.pl'), + os.path.join(srctree, 'tools/docs/get_feat.pl'), 'rest', '--enable-fname', '--dir', diff --git a/scripts/get_feat.pl b/scripts/get_feat.pl deleted file mode 100755 index 40fb28c8424e..000000000000 --- a/scripts/get_feat.pl +++ /dev/null @@ -1,641 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -use strict; -use Pod::Usage; -use Getopt::Long; -use File::Find; -use Fcntl ':mode'; -use Cwd 'abs_path'; - -my $help; -my $man; -my $debug; -my $arch; -my $feat; -my $enable_fname; - -my $basename = abs_path($0); -$basename =~ s,/[^/]+$,/,; - -my $prefix=$basename . "../Documentation/features"; - -# Used only at for full features output. The script will auto-adjust -# such values for the minimal possible values -my $status_size = 1; -my $description_size = 1; - -GetOptions( - "debug|d+" => \$debug, - "dir=s" => \$prefix, - 'help|?' => \$help, - 'arch=s' => \$arch, - 'feat=s' => \$feat, - 'feature=s' => \$feat, - "enable-fname" => \$enable_fname, - man => \$man -) or pod2usage(2); - -pod2usage(1) if $help; -pod2usage(-exitstatus => 0, -verbose => 2) if $man; - -pod2usage(1) if (scalar @ARGV < 1 || @ARGV > 2); - -my ($cmd, $arg) = @ARGV; - -pod2usage(2) if ($cmd ne "current" && $cmd ne "rest" && $cmd ne "validate" - && $cmd ne "ls" && $cmd ne "list"); - -require Data::Dumper if ($debug); - -my %data; -my %archs; - -# -# Displays an error message, printing file name and line -# -sub parse_error($$$$) { - my ($file, $ln, $msg, $data) = @_; - - $data =~ s/\s+$/\n/; - - print STDERR "Warning: file $file#$ln:\n\t$msg"; - - if ($data ne "") { - print STDERR ". Line\n\t\t$data"; - } else { - print STDERR "\n"; - } -} - -# -# Parse a features file, storing its contents at %data -# - -my $h_name = "Feature"; -my $h_kconfig = "Kconfig"; -my $h_description = "Description"; -my $h_subsys = "Subsystem"; -my $h_status = "Status"; -my $h_arch = "Architecture"; - -my $max_size_name = length($h_name); -my $max_size_kconfig = length($h_kconfig); -my $max_size_description = length($h_description); -my $max_size_subsys = length($h_subsys); -my $max_size_status = length($h_status); - -my $max_size_arch = 0; -my $max_size_arch_with_header; -my $max_description_word = 0; - -sub parse_feat { - my $file = $File::Find::name; - - my $mode = (stat($file))[2]; - return if ($mode & S_IFDIR); - return if ($file =~ m,($prefix)/arch-support.txt,); - return if (!($file =~ m,arch-support.txt$,)); - - if ($enable_fname) { - printf ".. FILE %s\n", abs_path($file); - } - - my $subsys = ""; - $subsys = $2 if ( m,.*($prefix)/([^/]+).*,); - - if (length($subsys) > $max_size_subsys) { - $max_size_subsys = length($subsys); - } - - my $name; - my $kconfig; - my $description; - my $comments = ""; - my $last_status; - my $ln; - my %arch_table; - - print STDERR "Opening $file\n" if ($debug > 1); - open IN, $file; - - while() { - $ln++; - - if (m/^\#\s+Feature\s+name:\s*(.*\S)/) { - $name = $1; - if (length($name) > $max_size_name) { - $max_size_name = length($name); - } - next; - } - if (m/^\#\s+Kconfig:\s*(.*\S)/) { - $kconfig = $1; - if (length($kconfig) > $max_size_kconfig) { - $max_size_kconfig = length($kconfig); - } - next; - } - if (m/^\#\s+description:\s*(.*\S)/) { - $description = $1; - if (length($description) > $max_size_description) { - $max_size_description = length($description); - } - - foreach my $word (split /\s+/, $description) { - if (length($word) > $max_description_word) { - $max_description_word = length($word); - } - } - - next; - } - next if (m/^\\s*$/); - next if (m/^\s*\-+\s*$/); - next if (m/^\s*\|\s*arch\s*\|\s*status\s*\|\s*$/); - - if (m/^\#\s*(.*)/) { - $comments .= "$1\n"; - next; - } - if (m/^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$/) { - my $a = $1; - my $status = $2; - - if (length($status) > $max_size_status) { - $max_size_status = length($status); - } - if (length($a) > $max_size_arch) { - $max_size_arch = length($a); - } - - $status = "---" if ($status =~ m/^\.\.$/); - - $archs{$a} = 1; - $arch_table{$a} = $status; - next; - } - - #Everything else is an error - parse_error($file, $ln, "line is invalid", $_); - } - close IN; - - if (!$name) { - parse_error($file, $ln, "Feature name not found", ""); - return; - } - - parse_error($file, $ln, "Subsystem not found", "") if (!$subsys); - parse_error($file, $ln, "Kconfig not found", "") if (!$kconfig); - parse_error($file, $ln, "Description not found", "") if (!$description); - - if (!%arch_table) { - parse_error($file, $ln, "Architecture table not found", ""); - return; - } - - $data{$name}->{where} = $file; - $data{$name}->{subsys} = $subsys; - $data{$name}->{kconfig} = $kconfig; - $data{$name}->{description} = $description; - $data{$name}->{comments} = $comments; - $data{$name}->{table} = \%arch_table; - - $max_size_arch_with_header = $max_size_arch + length($h_arch); -} - -# -# Output feature(s) for a given architecture -# -sub output_arch_table { - my $title = "Feature status on $arch architecture"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; - printf "%-${max_size_subsys}s ", $h_subsys; - printf "%-${max_size_name}s ", $h_name; - printf "%-${max_size_kconfig}s ", $h_kconfig; - printf "%-${max_size_status}s ", $h_status; - printf "%-${max_size_description}s\n", $h_description; - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; - - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || - ("\L$a" cmp "\L$b") - } keys %data) { - next if ($feat && $name ne $feat); - - my %arch_table = %{$data{$name}->{table}}; - printf "%-${max_size_subsys}s ", $data{$name}->{subsys}; - printf "%-${max_size_name}s ", $name; - printf "%-${max_size_kconfig}s ", $data{$name}->{kconfig}; - printf "%-${max_size_status}s ", $arch_table{$arch}; - printf "%-s\n", $data{$name}->{description}; - } - - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; -} - -# -# list feature(s) for a given architecture -# -sub list_arch_features { - print "#\n# Kernel feature support matrix of the '$arch' architecture:\n#\n"; - - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || - ("\L$a" cmp "\L$b") - } keys %data) { - next if ($feat && $name ne $feat); - - my %arch_table = %{$data{$name}->{table}}; - - my $status = $arch_table{$arch}; - $status = " " x ((4 - length($status)) / 2) . $status; - - printf " %${max_size_subsys}s/ ", $data{$name}->{subsys}; - printf "%-${max_size_name}s: ", $name; - printf "%-5s| ", $status; - printf "%${max_size_kconfig}s # ", $data{$name}->{kconfig}; - printf " %s\n", $data{$name}->{description}; - } -} - -# -# Output a feature on all architectures -# -sub output_feature { - my $title = "Feature $feat"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - print ":Subsystem: $data{$feat}->{subsys} \n" if ($data{$feat}->{subsys}); - print ":Kconfig: $data{$feat}->{kconfig} \n" if ($data{$feat}->{kconfig}); - - my $desc = $data{$feat}->{description}; - $desc =~ s/^([a-z])/\U$1/; - $desc =~ s/\.?\s*//; - print "\n$desc.\n\n"; - - my $com = $data{$feat}->{comments}; - $com =~ s/^\s+//; - $com =~ s/\s+$//; - if ($com) { - print "Comments\n"; - print "--------\n\n"; - print "$com\n\n"; - } - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; - - printf "%-${max_size_arch}s ", $h_arch; - printf "%-${max_size_status}s", $h_status . "\n"; - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; - - my %arch_table = %{$data{$feat}->{table}}; - foreach my $arch (sort keys %arch_table) { - printf "%-${max_size_arch}s ", $arch; - printf "%-${max_size_status}s\n", $arch_table{$arch}; - } - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; -} - -# -# Output all features for all architectures -# - -sub matrix_lines($$$) { - my $desc_size = shift; - my $status_size = shift; - my $header = shift; - my $fill; - my $ln_marker; - - if ($header) { - $ln_marker = "="; - } else { - $ln_marker = "-"; - } - - $fill = $ln_marker; - - print "+"; - print $fill x $max_size_name; - print "+"; - print $fill x $desc_size; - print "+"; - print $ln_marker x $status_size; - print "+\n"; -} - -sub output_matrix { - my $title = "Feature status on all architectures"; - my $notcompat = "Not compatible"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - my $desc_title = "$h_kconfig / $h_description"; - - my $desc_size = $max_size_kconfig + 4; - if (!$description_size) { - $desc_size = $max_size_description if ($max_size_description > $desc_size); - } else { - $desc_size = $description_size if ($description_size > $desc_size); - } - $desc_size = $max_description_word if ($max_description_word > $desc_size); - - $desc_size = length($desc_title) if (length($desc_title) > $desc_size); - - $max_size_status = length($notcompat) if (length($notcompat) > $max_size_status); - - # Ensure that the status will fit - my $min_status_size = $max_size_status + $max_size_arch + 6; - $status_size = $min_status_size if ($status_size < $min_status_size); - - - my $cur_subsys = ""; - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) or - ("\L$a" cmp "\L$b") - } keys %data) { - - if ($cur_subsys ne $data{$name}->{subsys}) { - if ($cur_subsys ne "") { - printf "\n"; - } - - $cur_subsys = $data{$name}->{subsys}; - - my $title = "Subsystem: $cur_subsys"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - - matrix_lines($desc_size, $status_size, 0); - - printf "|%-${max_size_name}s", $h_name; - printf "|%-${desc_size}s", $desc_title; - - printf "|%-${status_size}s|\n", "Status per architecture"; - matrix_lines($desc_size, $status_size, 1); - } - - my %arch_table = %{$data{$name}->{table}}; - my $cur_status = ""; - - my (@lines, @descs); - my $line = ""; - foreach my $arch (sort { - ($arch_table{$b} cmp $arch_table{$a}) or - ("\L$a" cmp "\L$b") - } keys %arch_table) { - - my $status = $arch_table{$arch}; - - if ($status eq "---") { - $status = $notcompat; - } - - if ($status ne $cur_status) { - if ($line ne "") { - push @lines, $line; - $line = ""; - } - $line = "- **" . $status . "**: " . $arch; - } elsif (length($line) + length ($arch) + 2 < $status_size) { - $line .= ", " . $arch; - } else { - push @lines, $line; - $line = " " . $arch; - } - $cur_status = $status; - } - push @lines, $line if ($line ne ""); - - my $description = $data{$name}->{description}; - while (length($description) > $desc_size) { - my $d = substr $description, 0, $desc_size; - - # Ensure that it will end on a space - # if it can't, it means that the size is too small - # Instead of aborting it, let's print what we have - if (!($d =~ s/^(.*)\s+.*/$1/)) { - $d = substr $d, 0, -1; - push @descs, "$d\\"; - $description =~ s/^\Q$d\E//; - } else { - push @descs, $d; - $description =~ s/^\Q$d\E\s+//; - } - } - push @descs, $description; - - # Ensure that the full description will be printed - push @lines, "" while (scalar(@lines) < 2 + scalar(@descs)); - - my $ln = 0; - for my $line(@lines) { - if (!$ln) { - printf "|%-${max_size_name}s", $name; - printf "|%-${desc_size}s", "``" . $data{$name}->{kconfig} . "``"; - } elsif ($ln >= 2 && scalar(@descs)) { - printf "|%-${max_size_name}s", ""; - printf "|%-${desc_size}s", shift @descs; - } else { - printf "|%-${max_size_name}s", ""; - printf "|%-${desc_size}s", ""; - } - - printf "|%-${status_size}s|\n", $line; - - $ln++; - } - matrix_lines($desc_size, $status_size, 0); - } -} - - -# -# Parses all feature files located at $prefix dir -# -find({wanted =>\&parse_feat, no_chdir => 1}, $prefix); - -print STDERR Data::Dumper->Dump([\%data], [qw(*data)]) if ($debug); - -# -# Handles the command -# -if ($cmd eq "current") { - $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); - $arch =~s/\s+$//; -} - -if ($cmd eq "ls" or $cmd eq "list") { - if (!$arch) { - $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); - $arch =~s/\s+$//; - } - - list_arch_features; - - exit; -} - -if ($cmd ne "validate") { - if ($arch) { - output_arch_table; - } elsif ($feat) { - output_feature; - } else { - output_matrix; - } -} - -__END__ - -=head1 NAME - -get_feat.pl - parse the Linux Feature files and produce a ReST book. - -=head1 SYNOPSIS - -B [--debug] [--man] [--help] [--dir=] [--arch=] - [--feature=|--feat=] [] - -Where can be: - -=over 8 - -B - output table in ReST compatible ASCII format - with features for this machine's architecture - -B - output table(s) in ReST compatible ASCII format - with features in ReST markup language. The output - is affected by --arch or --feat/--feature flags. - -B - validate the contents of the files under - Documentation/features. - -B or B - list features for this machine's architecture, - using an easier to parse format. - The output is affected by --arch flag. - -=back - -=head1 OPTIONS - -=over 8 - -=item B<--arch> - -Output features for an specific architecture, optionally filtering for -a single specific feature. - -=item B<--feat> or B<--feature> - -Output features for a single specific feature. - -=item B<--dir> - -Changes the location of the Feature files. By default, it uses -the Documentation/features directory. - -=item B<--enable-fname> - -Prints the file name of the feature files. This can be used in order to -track dependencies during documentation build. - -=item B<--debug> - -Put the script in verbose mode, useful for debugging. Can be called multiple -times, to increase verbosity. - -=item B<--help> - -Prints a brief help message and exits. - -=item B<--man> - -Prints the manual page and exits. - -=back - -=head1 DESCRIPTION - -Parse the Linux feature files from Documentation/features (by default), -optionally producing results at ReST format. - -It supports output data per architecture, per feature or a -feature x arch matrix. - -When used with B command, it will use either one of the tree formats: - -If neither B<--arch> or B<--feature> arguments are used, it will output a -matrix with features per architecture. - -If B<--arch> argument is used, it will output the features availability for -a given architecture. - -If B<--feat> argument is used, it will output the content of the feature -file using ReStructured Text markup. - -=head1 BUGS - -Report bugs to Mauro Carvalho Chehab - -=head1 COPYRIGHT - -Copyright (c) 2019 by Mauro Carvalho Chehab . - -License GPLv2: GNU GPL version 2 . - -This is free software: you are free to change and redistribute it. -There is NO WARRANTY, to the extent permitted by law. - -=cut diff --git a/tools/docs/features-refresh.sh b/tools/docs/features-refresh.sh new file mode 100755 index 000000000000..c2288124e94a --- /dev/null +++ b/tools/docs/features-refresh.sh @@ -0,0 +1,98 @@ +# +# Small script that refreshes the kernel feature support status in place. +# + +for F_FILE in Documentation/features/*/*/arch-support.txt; do + F=$(grep "^# Kconfig:" "$F_FILE" | cut -c26-) + + # + # Each feature F is identified by a pair (O, K), where 'O' can + # be either the empty string (for 'nop') or "not" (the logical + # negation operator '!'); other operators are not supported. + # + O="" + K=$F + if [[ "$F" == !* ]]; then + O="not" + K=$(echo $F | sed -e 's/^!//g') + fi + + # + # F := (O, K) is 'valid' iff there is a Kconfig file (for some + # arch) which contains K. + # + # Notice that this definition entails an 'asymmetry' between + # the case 'O = ""' and the case 'O = "not"'. E.g., F may be + # _invalid_ if: + # + # [case 'O = ""'] + # 1) no arch provides support for F, + # 2) K does not exist (e.g., it was renamed/mis-typed); + # + # [case 'O = "not"'] + # 3) all archs provide support for F, + # 4) as in (2). + # + # The rationale for adopting this definition (and, thus, for + # keeping the asymmetry) is: + # + # We want to be able to 'detect' (2) (or (4)). + # + # (1) and (3) may further warn the developers about the fact + # that K can be removed. + # + F_VALID="false" + for ARCH_DIR in arch/*/; do + K_FILES=$(find $ARCH_DIR -name "Kconfig*") + K_GREP=$(grep "$K" $K_FILES) + if [ ! -z "$K_GREP" ]; then + F_VALID="true" + break + fi + done + if [ "$F_VALID" = "false" ]; then + printf "WARNING: '%s' is not a valid Kconfig\n" "$F" + fi + + T_FILE="$F_FILE.tmp" + grep "^#" $F_FILE > $T_FILE + echo " -----------------------" >> $T_FILE + echo " | arch |status|" >> $T_FILE + echo " -----------------------" >> $T_FILE + for ARCH_DIR in arch/*/; do + ARCH=$(echo $ARCH_DIR | sed -e 's/^arch//g' | sed -e 's/\///g') + K_FILES=$(find $ARCH_DIR -name "Kconfig*") + K_GREP=$(grep "$K" $K_FILES) + # + # Arch support status values for (O, K) are updated according + # to the following rules. + # + # - ("", K) is 'supported by a given arch', if there is a + # Kconfig file for that arch which contains K; + # + # - ("not", K) is 'supported by a given arch', if there is + # no Kconfig file for that arch which contains K; + # + # - otherwise: preserve the previous status value (if any), + # default to 'not yet supported'. + # + # Notice that, according these rules, invalid features may be + # updated/modified. + # + if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then + printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE + elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then + printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE + else + S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:") + if [ ! -z "$S" ]; then + echo "$S" >> $T_FILE + else + printf " |%12s: | TODO |\n" "$ARCH" \ + >> $T_FILE + fi + fi + done + echo " -----------------------" >> $T_FILE + mv $T_FILE $F_FILE +done diff --git a/tools/docs/get_feat.pl b/tools/docs/get_feat.pl new file mode 100755 index 000000000000..d75e7c85dc85 --- /dev/null +++ b/tools/docs/get_feat.pl @@ -0,0 +1,641 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +use strict; +use Pod::Usage; +use Getopt::Long; +use File::Find; +use Fcntl ':mode'; +use Cwd 'abs_path'; + +my $help; +my $man; +my $debug; +my $arch; +my $feat; +my $enable_fname; + +my $basename = abs_path($0); +$basename =~ s,/[^/]+$,/,; + +my $prefix=$basename . "../../Documentation/features"; + +# Used only at for full features output. The script will auto-adjust +# such values for the minimal possible values +my $status_size = 1; +my $description_size = 1; + +GetOptions( + "debug|d+" => \$debug, + "dir=s" => \$prefix, + 'help|?' => \$help, + 'arch=s' => \$arch, + 'feat=s' => \$feat, + 'feature=s' => \$feat, + "enable-fname" => \$enable_fname, + man => \$man +) or pod2usage(2); + +pod2usage(1) if $help; +pod2usage(-exitstatus => 0, -verbose => 2) if $man; + +pod2usage(1) if (scalar @ARGV < 1 || @ARGV > 2); + +my ($cmd, $arg) = @ARGV; + +pod2usage(2) if ($cmd ne "current" && $cmd ne "rest" && $cmd ne "validate" + && $cmd ne "ls" && $cmd ne "list"); + +require Data::Dumper if ($debug); + +my %data; +my %archs; + +# +# Displays an error message, printing file name and line +# +sub parse_error($$$$) { + my ($file, $ln, $msg, $data) = @_; + + $data =~ s/\s+$/\n/; + + print STDERR "Warning: file $file#$ln:\n\t$msg"; + + if ($data ne "") { + print STDERR ". Line\n\t\t$data"; + } else { + print STDERR "\n"; + } +} + +# +# Parse a features file, storing its contents at %data +# + +my $h_name = "Feature"; +my $h_kconfig = "Kconfig"; +my $h_description = "Description"; +my $h_subsys = "Subsystem"; +my $h_status = "Status"; +my $h_arch = "Architecture"; + +my $max_size_name = length($h_name); +my $max_size_kconfig = length($h_kconfig); +my $max_size_description = length($h_description); +my $max_size_subsys = length($h_subsys); +my $max_size_status = length($h_status); + +my $max_size_arch = 0; +my $max_size_arch_with_header; +my $max_description_word = 0; + +sub parse_feat { + my $file = $File::Find::name; + + my $mode = (stat($file))[2]; + return if ($mode & S_IFDIR); + return if ($file =~ m,($prefix)/arch-support.txt,); + return if (!($file =~ m,arch-support.txt$,)); + + if ($enable_fname) { + printf ".. FILE %s\n", abs_path($file); + } + + my $subsys = ""; + $subsys = $2 if ( m,.*($prefix)/([^/]+).*,); + + if (length($subsys) > $max_size_subsys) { + $max_size_subsys = length($subsys); + } + + my $name; + my $kconfig; + my $description; + my $comments = ""; + my $last_status; + my $ln; + my %arch_table; + + print STDERR "Opening $file\n" if ($debug > 1); + open IN, $file; + + while() { + $ln++; + + if (m/^\#\s+Feature\s+name:\s*(.*\S)/) { + $name = $1; + if (length($name) > $max_size_name) { + $max_size_name = length($name); + } + next; + } + if (m/^\#\s+Kconfig:\s*(.*\S)/) { + $kconfig = $1; + if (length($kconfig) > $max_size_kconfig) { + $max_size_kconfig = length($kconfig); + } + next; + } + if (m/^\#\s+description:\s*(.*\S)/) { + $description = $1; + if (length($description) > $max_size_description) { + $max_size_description = length($description); + } + + foreach my $word (split /\s+/, $description) { + if (length($word) > $max_description_word) { + $max_description_word = length($word); + } + } + + next; + } + next if (m/^\\s*$/); + next if (m/^\s*\-+\s*$/); + next if (m/^\s*\|\s*arch\s*\|\s*status\s*\|\s*$/); + + if (m/^\#\s*(.*)/) { + $comments .= "$1\n"; + next; + } + if (m/^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$/) { + my $a = $1; + my $status = $2; + + if (length($status) > $max_size_status) { + $max_size_status = length($status); + } + if (length($a) > $max_size_arch) { + $max_size_arch = length($a); + } + + $status = "---" if ($status =~ m/^\.\.$/); + + $archs{$a} = 1; + $arch_table{$a} = $status; + next; + } + + #Everything else is an error + parse_error($file, $ln, "line is invalid", $_); + } + close IN; + + if (!$name) { + parse_error($file, $ln, "Feature name not found", ""); + return; + } + + parse_error($file, $ln, "Subsystem not found", "") if (!$subsys); + parse_error($file, $ln, "Kconfig not found", "") if (!$kconfig); + parse_error($file, $ln, "Description not found", "") if (!$description); + + if (!%arch_table) { + parse_error($file, $ln, "Architecture table not found", ""); + return; + } + + $data{$name}->{where} = $file; + $data{$name}->{subsys} = $subsys; + $data{$name}->{kconfig} = $kconfig; + $data{$name}->{description} = $description; + $data{$name}->{comments} = $comments; + $data{$name}->{table} = \%arch_table; + + $max_size_arch_with_header = $max_size_arch + length($h_arch); +} + +# +# Output feature(s) for a given architecture +# +sub output_arch_table { + my $title = "Feature status on $arch architecture"; + + print "=" x length($title) . "\n"; + print "$title\n"; + print "=" x length($title) . "\n\n"; + + print "=" x $max_size_subsys; + print " "; + print "=" x $max_size_name; + print " "; + print "=" x $max_size_kconfig; + print " "; + print "=" x $max_size_status; + print " "; + print "=" x $max_size_description; + print "\n"; + printf "%-${max_size_subsys}s ", $h_subsys; + printf "%-${max_size_name}s ", $h_name; + printf "%-${max_size_kconfig}s ", $h_kconfig; + printf "%-${max_size_status}s ", $h_status; + printf "%-${max_size_description}s\n", $h_description; + print "=" x $max_size_subsys; + print " "; + print "=" x $max_size_name; + print " "; + print "=" x $max_size_kconfig; + print " "; + print "=" x $max_size_status; + print " "; + print "=" x $max_size_description; + print "\n"; + + foreach my $name (sort { + ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || + ("\L$a" cmp "\L$b") + } keys %data) { + next if ($feat && $name ne $feat); + + my %arch_table = %{$data{$name}->{table}}; + printf "%-${max_size_subsys}s ", $data{$name}->{subsys}; + printf "%-${max_size_name}s ", $name; + printf "%-${max_size_kconfig}s ", $data{$name}->{kconfig}; + printf "%-${max_size_status}s ", $arch_table{$arch}; + printf "%-s\n", $data{$name}->{description}; + } + + print "=" x $max_size_subsys; + print " "; + print "=" x $max_size_name; + print " "; + print "=" x $max_size_kconfig; + print " "; + print "=" x $max_size_status; + print " "; + print "=" x $max_size_description; + print "\n"; +} + +# +# list feature(s) for a given architecture +# +sub list_arch_features { + print "#\n# Kernel feature support matrix of the '$arch' architecture:\n#\n"; + + foreach my $name (sort { + ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || + ("\L$a" cmp "\L$b") + } keys %data) { + next if ($feat && $name ne $feat); + + my %arch_table = %{$data{$name}->{table}}; + + my $status = $arch_table{$arch}; + $status = " " x ((4 - length($status)) / 2) . $status; + + printf " %${max_size_subsys}s/ ", $data{$name}->{subsys}; + printf "%-${max_size_name}s: ", $name; + printf "%-5s| ", $status; + printf "%${max_size_kconfig}s # ", $data{$name}->{kconfig}; + printf " %s\n", $data{$name}->{description}; + } +} + +# +# Output a feature on all architectures +# +sub output_feature { + my $title = "Feature $feat"; + + print "=" x length($title) . "\n"; + print "$title\n"; + print "=" x length($title) . "\n\n"; + + print ":Subsystem: $data{$feat}->{subsys} \n" if ($data{$feat}->{subsys}); + print ":Kconfig: $data{$feat}->{kconfig} \n" if ($data{$feat}->{kconfig}); + + my $desc = $data{$feat}->{description}; + $desc =~ s/^([a-z])/\U$1/; + $desc =~ s/\.?\s*//; + print "\n$desc.\n\n"; + + my $com = $data{$feat}->{comments}; + $com =~ s/^\s+//; + $com =~ s/\s+$//; + if ($com) { + print "Comments\n"; + print "--------\n\n"; + print "$com\n\n"; + } + + print "=" x $max_size_arch_with_header; + print " "; + print "=" x $max_size_status; + print "\n"; + + printf "%-${max_size_arch}s ", $h_arch; + printf "%-${max_size_status}s", $h_status . "\n"; + + print "=" x $max_size_arch_with_header; + print " "; + print "=" x $max_size_status; + print "\n"; + + my %arch_table = %{$data{$feat}->{table}}; + foreach my $arch (sort keys %arch_table) { + printf "%-${max_size_arch}s ", $arch; + printf "%-${max_size_status}s\n", $arch_table{$arch}; + } + + print "=" x $max_size_arch_with_header; + print " "; + print "=" x $max_size_status; + print "\n"; +} + +# +# Output all features for all architectures +# + +sub matrix_lines($$$) { + my $desc_size = shift; + my $status_size = shift; + my $header = shift; + my $fill; + my $ln_marker; + + if ($header) { + $ln_marker = "="; + } else { + $ln_marker = "-"; + } + + $fill = $ln_marker; + + print "+"; + print $fill x $max_size_name; + print "+"; + print $fill x $desc_size; + print "+"; + print $ln_marker x $status_size; + print "+\n"; +} + +sub output_matrix { + my $title = "Feature status on all architectures"; + my $notcompat = "Not compatible"; + + print "=" x length($title) . "\n"; + print "$title\n"; + print "=" x length($title) . "\n\n"; + + my $desc_title = "$h_kconfig / $h_description"; + + my $desc_size = $max_size_kconfig + 4; + if (!$description_size) { + $desc_size = $max_size_description if ($max_size_description > $desc_size); + } else { + $desc_size = $description_size if ($description_size > $desc_size); + } + $desc_size = $max_description_word if ($max_description_word > $desc_size); + + $desc_size = length($desc_title) if (length($desc_title) > $desc_size); + + $max_size_status = length($notcompat) if (length($notcompat) > $max_size_status); + + # Ensure that the status will fit + my $min_status_size = $max_size_status + $max_size_arch + 6; + $status_size = $min_status_size if ($status_size < $min_status_size); + + + my $cur_subsys = ""; + foreach my $name (sort { + ($data{$a}->{subsys} cmp $data{$b}->{subsys}) or + ("\L$a" cmp "\L$b") + } keys %data) { + + if ($cur_subsys ne $data{$name}->{subsys}) { + if ($cur_subsys ne "") { + printf "\n"; + } + + $cur_subsys = $data{$name}->{subsys}; + + my $title = "Subsystem: $cur_subsys"; + print "$title\n"; + print "=" x length($title) . "\n\n"; + + + matrix_lines($desc_size, $status_size, 0); + + printf "|%-${max_size_name}s", $h_name; + printf "|%-${desc_size}s", $desc_title; + + printf "|%-${status_size}s|\n", "Status per architecture"; + matrix_lines($desc_size, $status_size, 1); + } + + my %arch_table = %{$data{$name}->{table}}; + my $cur_status = ""; + + my (@lines, @descs); + my $line = ""; + foreach my $arch (sort { + ($arch_table{$b} cmp $arch_table{$a}) or + ("\L$a" cmp "\L$b") + } keys %arch_table) { + + my $status = $arch_table{$arch}; + + if ($status eq "---") { + $status = $notcompat; + } + + if ($status ne $cur_status) { + if ($line ne "") { + push @lines, $line; + $line = ""; + } + $line = "- **" . $status . "**: " . $arch; + } elsif (length($line) + length ($arch) + 2 < $status_size) { + $line .= ", " . $arch; + } else { + push @lines, $line; + $line = " " . $arch; + } + $cur_status = $status; + } + push @lines, $line if ($line ne ""); + + my $description = $data{$name}->{description}; + while (length($description) > $desc_size) { + my $d = substr $description, 0, $desc_size; + + # Ensure that it will end on a space + # if it can't, it means that the size is too small + # Instead of aborting it, let's print what we have + if (!($d =~ s/^(.*)\s+.*/$1/)) { + $d = substr $d, 0, -1; + push @descs, "$d\\"; + $description =~ s/^\Q$d\E//; + } else { + push @descs, $d; + $description =~ s/^\Q$d\E\s+//; + } + } + push @descs, $description; + + # Ensure that the full description will be printed + push @lines, "" while (scalar(@lines) < 2 + scalar(@descs)); + + my $ln = 0; + for my $line(@lines) { + if (!$ln) { + printf "|%-${max_size_name}s", $name; + printf "|%-${desc_size}s", "``" . $data{$name}->{kconfig} . "``"; + } elsif ($ln >= 2 && scalar(@descs)) { + printf "|%-${max_size_name}s", ""; + printf "|%-${desc_size}s", shift @descs; + } else { + printf "|%-${max_size_name}s", ""; + printf "|%-${desc_size}s", ""; + } + + printf "|%-${status_size}s|\n", $line; + + $ln++; + } + matrix_lines($desc_size, $status_size, 0); + } +} + + +# +# Parses all feature files located at $prefix dir +# +find({wanted =>\&parse_feat, no_chdir => 1}, $prefix); + +print STDERR Data::Dumper->Dump([\%data], [qw(*data)]) if ($debug); + +# +# Handles the command +# +if ($cmd eq "current") { + $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); + $arch =~s/\s+$//; +} + +if ($cmd eq "ls" or $cmd eq "list") { + if (!$arch) { + $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); + $arch =~s/\s+$//; + } + + list_arch_features; + + exit; +} + +if ($cmd ne "validate") { + if ($arch) { + output_arch_table; + } elsif ($feat) { + output_feature; + } else { + output_matrix; + } +} + +__END__ + +=head1 NAME + +get_feat.pl - parse the Linux Feature files and produce a ReST book. + +=head1 SYNOPSIS + +B [--debug] [--man] [--help] [--dir=] [--arch=] + [--feature=|--feat=] [] + +Where can be: + +=over 8 + +B - output table in ReST compatible ASCII format + with features for this machine's architecture + +B - output table(s) in ReST compatible ASCII format + with features in ReST markup language. The output + is affected by --arch or --feat/--feature flags. + +B - validate the contents of the files under + Documentation/features. + +B or B - list features for this machine's architecture, + using an easier to parse format. + The output is affected by --arch flag. + +=back + +=head1 OPTIONS + +=over 8 + +=item B<--arch> + +Output features for an specific architecture, optionally filtering for +a single specific feature. + +=item B<--feat> or B<--feature> + +Output features for a single specific feature. + +=item B<--dir> + +Changes the location of the Feature files. By default, it uses +the Documentation/features directory. + +=item B<--enable-fname> + +Prints the file name of the feature files. This can be used in order to +track dependencies during documentation build. + +=item B<--debug> + +Put the script in verbose mode, useful for debugging. Can be called multiple +times, to increase verbosity. + +=item B<--help> + +Prints a brief help message and exits. + +=item B<--man> + +Prints the manual page and exits. + +=back + +=head1 DESCRIPTION + +Parse the Linux feature files from Documentation/features (by default), +optionally producing results at ReST format. + +It supports output data per architecture, per feature or a +feature x arch matrix. + +When used with B command, it will use either one of the tree formats: + +If neither B<--arch> or B<--feature> arguments are used, it will output a +matrix with features per architecture. + +If B<--arch> argument is used, it will output the features availability for +a given architecture. + +If B<--feat> argument is used, it will output the content of the feature +file using ReStructured Text markup. + +=head1 BUGS + +Report bugs to Mauro Carvalho Chehab + +=head1 COPYRIGHT + +Copyright (c) 2019 by Mauro Carvalho Chehab . + +License GPLv2: GNU GPL version 2 . + +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. + +=cut diff --git a/tools/docs/list-arch.sh b/tools/docs/list-arch.sh new file mode 100755 index 000000000000..96fe83b7058b --- /dev/null +++ b/tools/docs/list-arch.sh @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Small script that visualizes the kernel feature support status +# of an architecture. +# +# (If no arguments are given then it will print the host architecture's status.) +# + +ARCH=${1:-$(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/')} + +$(dirname $0)/get_feat.pl list --arch $ARCH -- cgit v1.2.3 From d37366cac4ccfb71c77e9620f63e3a6fcdf3816c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:03:02 -0600 Subject: docs: move checktransupdate.py to tools/docs The checktranslate.py tool currently languishes in scripts/; move it to tools/docs and update references accordingly. Cc: Alex Shi Cc: Yanteng Si Cc: Dongliang Mu Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/checktransupdate.rst | 6 +- .../zh_CN/doc-guide/checktransupdate.rst | 6 +- Documentation/translations/zh_CN/how-to.rst | 2 +- MAINTAINERS | 1 - scripts/checktransupdate.py | 307 --------------------- tools/docs/checktransupdate.py | 307 +++++++++++++++++++++ 6 files changed, 314 insertions(+), 315 deletions(-) delete mode 100755 scripts/checktransupdate.py create mode 100755 tools/docs/checktransupdate.py (limited to 'scripts') diff --git a/Documentation/doc-guide/checktransupdate.rst b/Documentation/doc-guide/checktransupdate.rst index dfaf9d373747..7b25375cc6d9 100644 --- a/Documentation/doc-guide/checktransupdate.rst +++ b/Documentation/doc-guide/checktransupdate.rst @@ -27,15 +27,15 @@ Usage :: - ./scripts/checktransupdate.py --help + tools/docs/checktransupdate.py --help Please refer to the output of argument parser for usage details. Samples -- ``./scripts/checktransupdate.py -l zh_CN`` +- ``tools/docs/checktransupdate.py -l zh_CN`` This will print all the files that need to be updated in the zh_CN locale. -- ``./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst`` +- ``tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst`` This will only print the status of the specified file. Then the output is something like: diff --git a/Documentation/translations/zh_CN/doc-guide/checktransupdate.rst b/Documentation/translations/zh_CN/doc-guide/checktransupdate.rst index d20b4ce66b9f..dbfd65398077 100644 --- a/Documentation/translations/zh_CN/doc-guide/checktransupdate.rst +++ b/Documentation/translations/zh_CN/doc-guide/checktransupdate.rst @@ -28,15 +28,15 @@ :: - ./scripts/checktransupdate.py --help + tools/docs/checktransupdate.py --help 具体用法请参考参数解析器的输出 示例 -- ``./scripts/checktransupdate.py -l zh_CN`` +- ``tools/docs/checktransupdate.py -l zh_CN`` 这将打印 zh_CN 语言中需要更新的所有文件。 -- ``./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst`` +- ``tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst`` 这将只打印指定文件的状态。 然后输出类似如下的内容: diff --git a/Documentation/translations/zh_CN/how-to.rst b/Documentation/translations/zh_CN/how-to.rst index 714664fec308..7ae5d8765888 100644 --- a/Documentation/translations/zh_CN/how-to.rst +++ b/Documentation/translations/zh_CN/how-to.rst @@ -437,7 +437,7 @@ git email 默认会抄送给您一份,所以您可以切换为审阅者的角 对于首次参与 Linux 内核中文文档翻译的新手,建议您在 linux 目录中运行以下命令: :: - ./script/checktransupdate.py -l zh_CN`` + tools/docs/checktransupdate.py -l zh_CN`` 该命令会列出需要翻译或更新的英文文档,结果同时保存在 checktransupdate.log 中。 diff --git a/MAINTAINERS b/MAINTAINERS index 5aa6d769b254..cd187b9f1dc2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7411,7 +7411,6 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/checktransupdate.py F: scripts/documentation-file-ref-check F: scripts/get_abi.py F: scripts/kernel-doc* diff --git a/scripts/checktransupdate.py b/scripts/checktransupdate.py deleted file mode 100755 index e39529e46c3d..000000000000 --- a/scripts/checktransupdate.py +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -""" -This script helps track the translation status of the documentation -in different locales, e.g., zh_CN. More specially, it uses `git log` -commit to find the latest english commit from the translation commit -(order by author date) and the latest english commits from HEAD. If -differences occur, report the file and commits that need to be updated. - -The usage is as follows: -- ./scripts/checktransupdate.py -l zh_CN -This will print all the files that need to be updated or translated in the zh_CN locale. -- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst -This will only print the status of the specified file. - -The output is something like: -Documentation/dev-tools/kfence.rst -No translation in the locale of zh_CN - -Documentation/translations/zh_CN/dev-tools/testing-overview.rst -commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs") -1 commits needs resolving in total -""" - -import os -import re -import time -import logging -from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction -from datetime import datetime - - -def get_origin_path(file_path): - """Get the origin path from the translation path""" - paths = file_path.split("/") - tidx = paths.index("translations") - opaths = paths[:tidx] - opaths += paths[tidx + 2 :] - return "/".join(opaths) - - -def get_latest_commit_from(file_path, commit): - """Get the latest commit from the specified commit for the specified file""" - command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}" - logging.debug(command) - pipe = os.popen(command) - result = pipe.read() - result = result.split("\n") - if len(result) <= 1: - return None - - logging.debug("Result: %s", result[0]) - - return { - "hash": result[0], - "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"), - "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"), - "message": result[4:], - } - - -def get_origin_from_trans(origin_path, t_from_head): - """Get the latest origin commit from the translation commit""" - o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"]) - while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]: - o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^") - if o_from_t is not None: - logging.debug("tracked origin commit id: %s", o_from_t["hash"]) - return o_from_t - - -def get_origin_from_trans_smartly(origin_path, t_from_head): - """Get the latest origin commit from the formatted translation commit: - (1) update to commit HASH (TITLE) - (2) Update the translation through commit HASH (TITLE) - """ - # catch flag for 12-bit commit hash - HASH = r'([0-9a-f]{12})' - # pattern 1: contains "update to commit HASH" - pat_update_to = re.compile(rf'update to commit {HASH}') - # pattern 2: contains "Update the translation through commit HASH" - pat_update_translation = re.compile(rf'Update the translation through commit {HASH}') - - origin_commit_hash = None - for line in t_from_head["message"]: - # check if the line matches the first pattern - match = pat_update_to.search(line) - if match: - origin_commit_hash = match.group(1) - break - # check if the line matches the second pattern - match = pat_update_translation.search(line) - if match: - origin_commit_hash = match.group(1) - break - if origin_commit_hash is None: - return None - o_from_t = get_latest_commit_from(origin_path, origin_commit_hash) - if o_from_t is not None: - logging.debug("tracked origin commit id: %s", o_from_t["hash"]) - return o_from_t - - -def get_commits_count_between(opath, commit1, commit2): - """Get the commits count between two commits for the specified file""" - command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}" - logging.debug(command) - pipe = os.popen(command) - result = pipe.read().split("\n") - # filter out empty lines - result = list(filter(lambda x: x != "", result)) - return result - - -def pretty_output(commit): - """Pretty print the commit message""" - command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}" - logging.debug(command) - pipe = os.popen(command) - return pipe.read() - - -def valid_commit(commit): - """Check if the commit is valid or not""" - msg = pretty_output(commit) - return "Merge tag" not in msg - -def check_per_file(file_path): - """Check the translation status for the specified file""" - opath = get_origin_path(file_path) - - if not os.path.isfile(opath): - logging.error("Cannot find the origin path for {file_path}") - return - - o_from_head = get_latest_commit_from(opath, "HEAD") - t_from_head = get_latest_commit_from(file_path, "HEAD") - - if o_from_head is None or t_from_head is None: - logging.error("Cannot find the latest commit for %s", file_path) - return - - o_from_t = get_origin_from_trans_smartly(opath, t_from_head) - # notice, o_from_t from get_*_smartly() is always more accurate than from get_*() - if o_from_t is None: - o_from_t = get_origin_from_trans(opath, t_from_head) - - if o_from_t is None: - logging.error("Error: Cannot find the latest origin commit for %s", file_path) - return - - if o_from_head["hash"] == o_from_t["hash"]: - logging.debug("No update needed for %s", file_path) - else: - logging.info(file_path) - commits = get_commits_count_between( - opath, o_from_t["hash"], o_from_head["hash"] - ) - count = 0 - for commit in commits: - if valid_commit(commit): - logging.info("commit %s", pretty_output(commit)) - count += 1 - logging.info("%d commits needs resolving in total\n", count) - - -def valid_locales(locale): - """Check if the locale is valid or not""" - script_path = os.path.dirname(os.path.abspath(__file__)) - linux_path = os.path.join(script_path, "..") - if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"): - raise ArgumentTypeError("Invalid locale: {locale}") - return locale - - -def list_files_with_excluding_folders(folder, exclude_folders, include_suffix): - """List all files with the specified suffix in the folder and its subfolders""" - files = [] - stack = [folder] - - while stack: - pwd = stack.pop() - # filter out the exclude folders - if os.path.basename(pwd) in exclude_folders: - continue - # list all files and folders - for item in os.listdir(pwd): - ab_item = os.path.join(pwd, item) - if os.path.isdir(ab_item): - stack.append(ab_item) - else: - if ab_item.endswith(include_suffix): - files.append(ab_item) - - return files - - -class DmesgFormatter(logging.Formatter): - """Custom dmesg logging formatter""" - def format(self, record): - timestamp = time.time() - formatted_time = f"[{timestamp:>10.6f}]" - log_message = f"{formatted_time} {record.getMessage()}" - return log_message - - -def config_logging(log_level, log_file="checktransupdate.log"): - """configure logging based on the log level""" - # set up the root logger - logger = logging.getLogger() - logger.setLevel(log_level) - - # Create console handler - console_handler = logging.StreamHandler() - console_handler.setLevel(log_level) - - # Create file handler - file_handler = logging.FileHandler(log_file) - file_handler.setLevel(log_level) - - # Create formatter and add it to the handlers - formatter = DmesgFormatter() - console_handler.setFormatter(formatter) - file_handler.setFormatter(formatter) - - # Add the handler to the logger - logger.addHandler(console_handler) - logger.addHandler(file_handler) - - -def main(): - """Main function of the script""" - script_path = os.path.dirname(os.path.abspath(__file__)) - linux_path = os.path.join(script_path, "..") - - parser = ArgumentParser(description="Check the translation update") - parser.add_argument( - "-l", - "--locale", - default="zh_CN", - type=valid_locales, - help="Locale to check when files are not specified", - ) - - parser.add_argument( - "--print-missing-translations", - action=BooleanOptionalAction, - default=True, - help="Print files that do not have translations", - ) - - parser.add_argument( - '--log', - default='INFO', - choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - help='Set the logging level') - - parser.add_argument( - '--logfile', - default='checktransupdate.log', - help='Set the logging file (default: checktransupdate.log)') - - parser.add_argument( - "files", nargs="*", help="Files to check, if not specified, check all files" - ) - args = parser.parse_args() - - # Configure logging based on the --log argument - log_level = getattr(logging, args.log.upper(), logging.INFO) - config_logging(log_level) - - # Get files related to linux path - files = args.files - if len(files) == 0: - offical_files = list_files_with_excluding_folders( - os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst" - ) - - for file in offical_files: - # split the path into parts - path_parts = file.split(os.sep) - # find the index of the "Documentation" directory - kindex = path_parts.index("Documentation") - # insert the translations and locale after the Documentation directory - new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \ - + path_parts[kindex + 1 :] - # join the path parts back together - new_file = os.sep.join(new_path_parts) - if os.path.isfile(new_file): - files.append(new_file) - else: - if args.print_missing_translations: - logging.info(os.path.relpath(os.path.abspath(file), linux_path)) - logging.info("No translation in the locale of %s\n", args.locale) - - files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files)) - - # cd to linux root directory - os.chdir(linux_path) - - for file in files: - check_per_file(file) - - -if __name__ == "__main__": - main() diff --git a/tools/docs/checktransupdate.py b/tools/docs/checktransupdate.py new file mode 100755 index 000000000000..e894652369a5 --- /dev/null +++ b/tools/docs/checktransupdate.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This script helps track the translation status of the documentation +in different locales, e.g., zh_CN. More specially, it uses `git log` +commit to find the latest english commit from the translation commit +(order by author date) and the latest english commits from HEAD. If +differences occur, report the file and commits that need to be updated. + +The usage is as follows: +- tools/docs/checktransupdate.py -l zh_CN +This will print all the files that need to be updated or translated in the zh_CN locale. +- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst +This will only print the status of the specified file. + +The output is something like: +Documentation/dev-tools/kfence.rst +No translation in the locale of zh_CN + +Documentation/translations/zh_CN/dev-tools/testing-overview.rst +commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs") +1 commits needs resolving in total +""" + +import os +import re +import time +import logging +from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction +from datetime import datetime + + +def get_origin_path(file_path): + """Get the origin path from the translation path""" + paths = file_path.split("/") + tidx = paths.index("translations") + opaths = paths[:tidx] + opaths += paths[tidx + 2 :] + return "/".join(opaths) + + +def get_latest_commit_from(file_path, commit): + """Get the latest commit from the specified commit for the specified file""" + command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}" + logging.debug(command) + pipe = os.popen(command) + result = pipe.read() + result = result.split("\n") + if len(result) <= 1: + return None + + logging.debug("Result: %s", result[0]) + + return { + "hash": result[0], + "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"), + "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"), + "message": result[4:], + } + + +def get_origin_from_trans(origin_path, t_from_head): + """Get the latest origin commit from the translation commit""" + o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"]) + while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]: + o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^") + if o_from_t is not None: + logging.debug("tracked origin commit id: %s", o_from_t["hash"]) + return o_from_t + + +def get_origin_from_trans_smartly(origin_path, t_from_head): + """Get the latest origin commit from the formatted translation commit: + (1) update to commit HASH (TITLE) + (2) Update the translation through commit HASH (TITLE) + """ + # catch flag for 12-bit commit hash + HASH = r'([0-9a-f]{12})' + # pattern 1: contains "update to commit HASH" + pat_update_to = re.compile(rf'update to commit {HASH}') + # pattern 2: contains "Update the translation through commit HASH" + pat_update_translation = re.compile(rf'Update the translation through commit {HASH}') + + origin_commit_hash = None + for line in t_from_head["message"]: + # check if the line matches the first pattern + match = pat_update_to.search(line) + if match: + origin_commit_hash = match.group(1) + break + # check if the line matches the second pattern + match = pat_update_translation.search(line) + if match: + origin_commit_hash = match.group(1) + break + if origin_commit_hash is None: + return None + o_from_t = get_latest_commit_from(origin_path, origin_commit_hash) + if o_from_t is not None: + logging.debug("tracked origin commit id: %s", o_from_t["hash"]) + return o_from_t + + +def get_commits_count_between(opath, commit1, commit2): + """Get the commits count between two commits for the specified file""" + command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}" + logging.debug(command) + pipe = os.popen(command) + result = pipe.read().split("\n") + # filter out empty lines + result = list(filter(lambda x: x != "", result)) + return result + + +def pretty_output(commit): + """Pretty print the commit message""" + command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}" + logging.debug(command) + pipe = os.popen(command) + return pipe.read() + + +def valid_commit(commit): + """Check if the commit is valid or not""" + msg = pretty_output(commit) + return "Merge tag" not in msg + +def check_per_file(file_path): + """Check the translation status for the specified file""" + opath = get_origin_path(file_path) + + if not os.path.isfile(opath): + logging.error("Cannot find the origin path for {file_path}") + return + + o_from_head = get_latest_commit_from(opath, "HEAD") + t_from_head = get_latest_commit_from(file_path, "HEAD") + + if o_from_head is None or t_from_head is None: + logging.error("Cannot find the latest commit for %s", file_path) + return + + o_from_t = get_origin_from_trans_smartly(opath, t_from_head) + # notice, o_from_t from get_*_smartly() is always more accurate than from get_*() + if o_from_t is None: + o_from_t = get_origin_from_trans(opath, t_from_head) + + if o_from_t is None: + logging.error("Error: Cannot find the latest origin commit for %s", file_path) + return + + if o_from_head["hash"] == o_from_t["hash"]: + logging.debug("No update needed for %s", file_path) + else: + logging.info(file_path) + commits = get_commits_count_between( + opath, o_from_t["hash"], o_from_head["hash"] + ) + count = 0 + for commit in commits: + if valid_commit(commit): + logging.info("commit %s", pretty_output(commit)) + count += 1 + logging.info("%d commits needs resolving in total\n", count) + + +def valid_locales(locale): + """Check if the locale is valid or not""" + script_path = os.path.dirname(os.path.abspath(__file__)) + linux_path = os.path.join(script_path, "../..") + if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"): + raise ArgumentTypeError("Invalid locale: {locale}") + return locale + + +def list_files_with_excluding_folders(folder, exclude_folders, include_suffix): + """List all files with the specified suffix in the folder and its subfolders""" + files = [] + stack = [folder] + + while stack: + pwd = stack.pop() + # filter out the exclude folders + if os.path.basename(pwd) in exclude_folders: + continue + # list all files and folders + for item in os.listdir(pwd): + ab_item = os.path.join(pwd, item) + if os.path.isdir(ab_item): + stack.append(ab_item) + else: + if ab_item.endswith(include_suffix): + files.append(ab_item) + + return files + + +class DmesgFormatter(logging.Formatter): + """Custom dmesg logging formatter""" + def format(self, record): + timestamp = time.time() + formatted_time = f"[{timestamp:>10.6f}]" + log_message = f"{formatted_time} {record.getMessage()}" + return log_message + + +def config_logging(log_level, log_file="checktransupdate.log"): + """configure logging based on the log level""" + # set up the root logger + logger = logging.getLogger() + logger.setLevel(log_level) + + # Create console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(log_level) + + # Create file handler + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(log_level) + + # Create formatter and add it to the handlers + formatter = DmesgFormatter() + console_handler.setFormatter(formatter) + file_handler.setFormatter(formatter) + + # Add the handler to the logger + logger.addHandler(console_handler) + logger.addHandler(file_handler) + + +def main(): + """Main function of the script""" + script_path = os.path.dirname(os.path.abspath(__file__)) + linux_path = os.path.join(script_path, "../..") + + parser = ArgumentParser(description="Check the translation update") + parser.add_argument( + "-l", + "--locale", + default="zh_CN", + type=valid_locales, + help="Locale to check when files are not specified", + ) + + parser.add_argument( + "--print-missing-translations", + action=BooleanOptionalAction, + default=True, + help="Print files that do not have translations", + ) + + parser.add_argument( + '--log', + default='INFO', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], + help='Set the logging level') + + parser.add_argument( + '--logfile', + default='checktransupdate.log', + help='Set the logging file (default: checktransupdate.log)') + + parser.add_argument( + "files", nargs="*", help="Files to check, if not specified, check all files" + ) + args = parser.parse_args() + + # Configure logging based on the --log argument + log_level = getattr(logging, args.log.upper(), logging.INFO) + config_logging(log_level) + + # Get files related to linux path + files = args.files + if len(files) == 0: + offical_files = list_files_with_excluding_folders( + os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst" + ) + + for file in offical_files: + # split the path into parts + path_parts = file.split(os.sep) + # find the index of the "Documentation" directory + kindex = path_parts.index("Documentation") + # insert the translations and locale after the Documentation directory + new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \ + + path_parts[kindex + 1 :] + # join the path parts back together + new_file = os.sep.join(new_path_parts) + if os.path.isfile(new_file): + files.append(new_file) + else: + if args.print_missing_translations: + logging.info(os.path.relpath(os.path.abspath(file), linux_path)) + logging.info("No translation in the locale of %s\n", args.locale) + + files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files)) + + # cd to linux root directory + os.chdir(linux_path) + + for file in files: + check_per_file(file) + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From eaae0ad9720428cd9e2bf9a40fedf137db95184f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:16:28 -0600 Subject: docs: move scripts/documentation-file-ref-check to tools/docs Add this script to the growing collection of documentation tools. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 4 +- MAINTAINERS | 2 - scripts/documentation-file-ref-check | 245 -------------------------------- tools/docs/documentation-file-ref-check | 245 ++++++++++++++++++++++++++++++++ tools/docs/sphinx-pre-install | 2 +- 5 files changed, 248 insertions(+), 250 deletions(-) delete mode 100755 scripts/documentation-file-ref-check create mode 100755 tools/docs/documentation-file-ref-check (limited to 'scripts') diff --git a/Documentation/Makefile b/Documentation/Makefile index 65d184eab739..1476aa1701ce 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -8,7 +8,7 @@ subdir- := devicetree/bindings ifneq ($(MAKECMDGOALS),cleandocs) # Check for broken documentation file references ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y) -$(shell $(srctree)/scripts/documentation-file-ref-check --warn) +$(shell $(srctree)/tools/docs/documentation-file-ref-check --warn) endif # Check for broken ABI files @@ -78,7 +78,7 @@ htmldocs-redirects: $(srctree)/Documentation/.renames.txt @tools/docs/gen-redirects.py --output $(BUILDDIR) < $< refcheckdocs: - $(Q)cd $(srctree);scripts/documentation-file-ref-check + $(Q)cd $(srctree); tools/docs/documentation-file-ref-check cleandocs: $(Q)rm -rf $(BUILDDIR) diff --git a/MAINTAINERS b/MAINTAINERS index cd187b9f1dc2..fc1da52433b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7411,7 +7411,6 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/documentation-file-ref-check F: scripts/get_abi.py F: scripts/kernel-doc* F: scripts/lib/abi/* @@ -7451,7 +7450,6 @@ M: Mauro Carvalho Chehab L: linux-doc@vger.kernel.org S: Maintained F: Documentation/sphinx/parse-headers.pl -F: scripts/documentation-file-ref-check F: tools/docs/sphinx-pre-install DOCUMENTATION/ITALIAN diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check deleted file mode 100755 index 408b1dbe7884..000000000000 --- a/scripts/documentation-file-ref-check +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# -# Treewide grep for references to files under Documentation, and report -# non-existing files in stderr. - -use warnings; -use strict; -use Getopt::Long qw(:config no_auto_abbrev); - -# NOTE: only add things here when the file was gone, but the text wants -# to mention a past documentation file, for example, to give credits for -# the original work. -my %false_positives = ( - "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help", - "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c", -); - -my $scriptname = $0; -$scriptname =~ s,.*/([^/]+/),$1,; - -# Parse arguments -my $help = 0; -my $fix = 0; -my $warn = 0; - -if (! -e ".git") { - printf "Warning: can't check if file exists, as this is not a git tree\n"; - exit 0; -} - -GetOptions( - 'fix' => \$fix, - 'warn' => \$warn, - 'h|help|usage' => \$help, -); - -if ($help != 0) { - print "$scriptname [--help] [--fix]\n"; - exit -1; -} - -# Step 1: find broken references -print "Finding broken references. This may take a while... " if ($fix); - -my %broken_ref; - -my $doc_fix = 0; - -open IN, "git grep ':doc:\`' Documentation/|" - or die "Failed to run git grep"; -while () { - next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,); - next if (m,sphinx/,); - - my $file = $1; - my $d = $1; - my $doc_ref = $2; - - my $f = $doc_ref; - - $d =~ s,(.*/).*,$1,; - $f =~ s,.*\<([^\>]+)\>,$1,; - - if ($f =~ m,^/,) { - $f = "$f.rst"; - $f =~ s,^/,Documentation/,; - } else { - $f = "$d$f.rst"; - } - - next if (grep -e, glob("$f")); - - if ($fix && !$doc_fix) { - print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n"; - } - $doc_fix++; - - print STDERR "$file: :doc:`$doc_ref`\n"; -} -close IN; - -open IN, "git grep 'Documentation/'|" - or die "Failed to run git grep"; -while () { - next if (!m/^([^:]+):(.*)/); - - my $f = $1; - my $ln = $2; - - # On linux-next, discard the Next/ directory - next if ($f =~ m,^Next/,); - - # Makefiles and scripts contain nasty expressions to parse docs - next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/); - - # It doesn't make sense to parse hidden files - next if ($f =~ m#/\.#); - - # Skip this script - next if ($f eq $scriptname); - - # Ignore the dir where documentation will be built - next if ($ln =~ m,\b(\S*)Documentation/output,); - - if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) { - my $prefix = $1; - my $ref = $2; - my $base = $2; - my $extra = $3; - - # some file references are like: - # /usr/src/linux/Documentation/DMA-{API,mapping}.txt - # For now, ignore them - next if ($extra =~ m/^{/); - - # Remove footnotes at the end like: - # Documentation/devicetree/dt-object-internal.txt[1] - $ref =~ s/(txt|rst)\[\d+]$/$1/; - - # Remove ending ']' without any '[' - $ref =~ s/\].*// if (!($ref =~ m/\[/)); - - # Remove puntuation marks at the end - $ref =~ s/[\,\.]+$//; - - my $fulref = "$prefix$ref"; - - $fulref =~ s/^(\ 1) { - print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n"; - foreach my $j (@find) { - $j =~ s,^./,,; - print STDERR " $j\n"; - } - } else { - $f = $find[0]; - $f =~ s,^./,,; - print "INFO: Replacing $ref to $f\n"; - foreach my $j (qx(git grep -l $ref)) { - qx(sed "s\@$ref\@$f\@g" -i $j); - } - } -} diff --git a/tools/docs/documentation-file-ref-check b/tools/docs/documentation-file-ref-check new file mode 100755 index 000000000000..0cad42f6943b --- /dev/null +++ b/tools/docs/documentation-file-ref-check @@ -0,0 +1,245 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# +# Treewide grep for references to files under Documentation, and report +# non-existing files in stderr. + +use warnings; +use strict; +use Getopt::Long qw(:config no_auto_abbrev); + +# NOTE: only add things here when the file was gone, but the text wants +# to mention a past documentation file, for example, to give credits for +# the original work. +my %false_positives = ( + "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help", + "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c", +); + +my $scriptname = $0; +$scriptname =~ s,tools/docs/([^/]+/),$1,; + +# Parse arguments +my $help = 0; +my $fix = 0; +my $warn = 0; + +if (! -e ".git") { + printf "Warning: can't check if file exists, as this is not a git tree\n"; + exit 0; +} + +GetOptions( + 'fix' => \$fix, + 'warn' => \$warn, + 'h|help|usage' => \$help, +); + +if ($help != 0) { + print "$scriptname [--help] [--fix]\n"; + exit -1; +} + +# Step 1: find broken references +print "Finding broken references. This may take a while... " if ($fix); + +my %broken_ref; + +my $doc_fix = 0; + +open IN, "git grep ':doc:\`' Documentation/|" + or die "Failed to run git grep"; +while () { + next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,); + next if (m,sphinx/,); + + my $file = $1; + my $d = $1; + my $doc_ref = $2; + + my $f = $doc_ref; + + $d =~ s,(.*/).*,$1,; + $f =~ s,.*\<([^\>]+)\>,$1,; + + if ($f =~ m,^/,) { + $f = "$f.rst"; + $f =~ s,^/,Documentation/,; + } else { + $f = "$d$f.rst"; + } + + next if (grep -e, glob("$f")); + + if ($fix && !$doc_fix) { + print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n"; + } + $doc_fix++; + + print STDERR "$file: :doc:`$doc_ref`\n"; +} +close IN; + +open IN, "git grep 'Documentation/'|" + or die "Failed to run git grep"; +while () { + next if (!m/^([^:]+):(.*)/); + + my $f = $1; + my $ln = $2; + + # On linux-next, discard the Next/ directory + next if ($f =~ m,^Next/,); + + # Makefiles and scripts contain nasty expressions to parse docs + next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/); + + # It doesn't make sense to parse hidden files + next if ($f =~ m#/\.#); + + # Skip this script + next if ($f eq $scriptname); + + # Ignore the dir where documentation will be built + next if ($ln =~ m,\b(\S*)Documentation/output,); + + if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) { + my $prefix = $1; + my $ref = $2; + my $base = $2; + my $extra = $3; + + # some file references are like: + # /usr/src/linux/Documentation/DMA-{API,mapping}.txt + # For now, ignore them + next if ($extra =~ m/^{/); + + # Remove footnotes at the end like: + # Documentation/devicetree/dt-object-internal.txt[1] + $ref =~ s/(txt|rst)\[\d+]$/$1/; + + # Remove ending ']' without any '[' + $ref =~ s/\].*// if (!($ref =~ m/\[/)); + + # Remove puntuation marks at the end + $ref =~ s/[\,\.]+$//; + + my $fulref = "$prefix$ref"; + + $fulref =~ s/^(\ 1) { + print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n"; + foreach my $j (@find) { + $j =~ s,^./,,; + print STDERR " $j\n"; + } + } else { + $f = $find[0]; + $f =~ s,^./,,; + print "INFO: Replacing $ref to $f\n"; + foreach my $j (qx(git grep -l $ref)) { + qx(sed "s\@$ref\@$f\@g" -i $j); + } + } +} diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index 698989584b6a..647e1f60357f 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -313,7 +313,7 @@ class MissingCheckers(AncillaryMethods): Right now, we still need Perl for doc build, as it is required by some tools called at docs or kernel build time, like: - scripts/documentation-file-ref-check + tools/docs/documentation-file-ref-check Also, checkpatch is on Perl. """ -- cgit v1.2.3 From a5dd93016f20912ec141d569b897e1fc2d94977d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:33:09 -0600 Subject: docs: move get_abi.py to tools/docs Move this tool out of scripts/ to join the other documentation tools; fix up a couple of erroneous references in the process. It's worth noting that this script will fail badly unless one has a PYTHONPATH referencing scripts/lib/abi. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/Kconfig | 2 +- Documentation/Makefile | 2 +- Documentation/sphinx/kernel_abi.py | 2 +- MAINTAINERS | 1 - scripts/get_abi.py | 214 ------------------------------------- tools/docs/get_abi.py | 214 +++++++++++++++++++++++++++++++++++++ 6 files changed, 217 insertions(+), 218 deletions(-) delete mode 100755 scripts/get_abi.py create mode 100755 tools/docs/get_abi.py (limited to 'scripts') diff --git a/Documentation/Kconfig b/Documentation/Kconfig index 3a0e7ac0c4e3..8b6c4b84b218 100644 --- a/Documentation/Kconfig +++ b/Documentation/Kconfig @@ -19,7 +19,7 @@ config WARN_ABI_ERRORS described at Documentation/ABI/README. Yet, as they're manually written, it would be possible that some of those files would have errors that would break them for being parsed by - scripts/get_abi.pl. Add a check to verify them. + tools/docs/get_abi.py. Add a check to verify them. If unsure, select 'N'. diff --git a/Documentation/Makefile b/Documentation/Makefile index 1476aa1701ce..734ec9c9a62c 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -13,7 +13,7 @@ endif # Check for broken ABI files ifeq ($(CONFIG_WARN_ABI_ERRORS),y) -$(shell $(srctree)/scripts/get_abi.py --dir $(srctree)/Documentation/ABI validate) +$(shell $(srctree)/tools/docs/get_abi.py --dir $(srctree)/Documentation/ABI validate) endif endif diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 4c4375201b9e..32e39fb8bc3b 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -14,7 +14,7 @@ :license: GPL Version 2, June 1991 see Linux/COPYING for details. The ``kernel-abi`` (:py:class:`KernelCmd`) directive calls the - scripts/get_abi.py script to parse the Kernel ABI files. + AbiParser class to parse the Kernel ABI files. Overview of directive's argument and options. diff --git a/MAINTAINERS b/MAINTAINERS index fc1da52433b6..5c632cd3902c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7411,7 +7411,6 @@ S: Maintained P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ -F: scripts/get_abi.py F: scripts/kernel-doc* F: scripts/lib/abi/* F: scripts/lib/kdoc/* diff --git a/scripts/get_abi.py b/scripts/get_abi.py deleted file mode 100755 index 7ce4748a46d2..000000000000 --- a/scripts/get_abi.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import argparse -import logging -import os -import sys - -# Import Python modules - -LIB_DIR = "lib/abi" -SRC_DIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - -from abi_parser import AbiParser # pylint: disable=C0413 -from abi_regex import AbiRegex # pylint: disable=C0413 -from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 -from system_symbols import SystemSymbols # pylint: disable=C0413 - -# Command line classes - - -REST_DESC = """ -Produce output in ReST format. - -The output is done on two sections: - -- Symbols: show all parsed symbols in alphabetic order; -- Files: cross reference the content of each file with the symbols on it. -""" - -class AbiRest: - """Initialize an argparse subparser for rest output""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("rest", - formatter_class=argparse.RawTextHelpFormatter, - description=REST_DESC) - - parser.add_argument("--enable-lineno", action="store_true", - help="enable lineno") - parser.add_argument("--raw", action="store_true", - help="output text as contained in the ABI files. " - "It not used, output will contain dynamically" - " generated cross references when possible.") - parser.add_argument("--no-file", action="store_true", - help="Don't the files section") - parser.add_argument("--show-hints", help="Show-hints") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.check_issues() - - for t in parser.doc(args.raw, not args.no_file): - if args.enable_lineno: - print (f".. LINENO {t[1]}#{t[2]}\n\n") - - print(t[0]) - -class AbiValidate: - """Initialize an argparse subparser for ABI validation""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("validate", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="list events") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.check_issues() - - -class AbiSearch: - """Initialize an argparse subparser for ABI search""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("search", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Search ABI using a regular expression") - - parser.add_argument("expression", - help="Case-insensitive search pattern for the ABI symbol") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.search_symbols(args.expression) - -UNDEFINED_DESC=""" -Check undefined ABIs on local machine. - -Read sysfs devnodes and check if the devnodes there are defined inside -ABI documentation. - -The search logic tries to minimize the number of regular expressions to -search per each symbol. - -By default, it runs on a single CPU, as Python support for CPU threads -is still experimental, and multi-process runs on Python is very slow. - -On experimental tests, if the number of ABI symbols to search per devnode -is contained on a limit of ~150 regular expressions, using a single CPU -is a lot faster than using multiple processes. However, if the number of -regular expressions to check is at the order of ~30000, using multiple -CPUs speeds up the check. -""" - -class AbiUndefined: - """ - Initialize an argparse subparser for logic to check undefined ABI at - the current machine's sysfs - """ - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("undefined", - formatter_class=argparse.RawTextHelpFormatter, - description=UNDEFINED_DESC) - - parser.add_argument("-S", "--sysfs-dir", default="/sys", - help="directory where sysfs is mounted") - parser.add_argument("-s", "--search-string", - help="search string regular expression to limit symbol search") - parser.add_argument("-H", "--show-hints", action="store_true", - help="Hints about definitions for missing ABI symbols.") - parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1, - help="If bigger than one, enables multiprocessing.") - parser.add_argument("-c", "--max-chunk-size", type=int, default=50, - help="Maximum number of chunk size") - parser.add_argument("-f", "--found", action="store_true", - help="Also show found items. " - "Helpful to debug the parser."), - parser.add_argument("-d", "--dry-run", action="store_true", - help="Don't actually search for undefined. " - "Helpful to debug the parser."), - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - abi = AbiRegex(args.dir, debug=args.debug, - search_string=args.search_string) - - abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints, - sysfs=args.sysfs_dir) - - abi_symbols.check_undefined_symbols(dry_run=args.dry_run, - found=args.found, - max_workers=args.jobs, - chunk_size=args.max_chunk_size) - - -def main(): - """Main program""" - - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument("-d", "--debug", type=int, default=0, help="debug level") - parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP) - - subparsers = parser.add_subparsers() - - AbiRest(subparsers) - AbiValidate(subparsers) - AbiSearch(subparsers) - AbiUndefined(subparsers) - - args = parser.parse_args() - - if args.debug: - level = logging.DEBUG - else: - level = logging.INFO - - logging.basicConfig(level=level, format="[%(levelname)s] %(message)s") - - if "func" in args: - args.func(args) - else: - sys.exit(f"Please specify a valid command for {sys.argv[0]}") - - -# Call main method -if __name__ == "__main__": - main() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py new file mode 100755 index 000000000000..7ce4748a46d2 --- /dev/null +++ b/tools/docs/get_abi.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +# pylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import argparse +import logging +import os +import sys + +# Import Python modules + +LIB_DIR = "lib/abi" +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) + +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) + +from abi_parser import AbiParser # pylint: disable=C0413 +from abi_regex import AbiRegex # pylint: disable=C0413 +from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 +from system_symbols import SystemSymbols # pylint: disable=C0413 + +# Command line classes + + +REST_DESC = """ +Produce output in ReST format. + +The output is done on two sections: + +- Symbols: show all parsed symbols in alphabetic order; +- Files: cross reference the content of each file with the symbols on it. +""" + +class AbiRest: + """Initialize an argparse subparser for rest output""" + + def __init__(self, subparsers): + """Initialize argparse subparsers""" + + parser = subparsers.add_parser("rest", + formatter_class=argparse.RawTextHelpFormatter, + description=REST_DESC) + + parser.add_argument("--enable-lineno", action="store_true", + help="enable lineno") + parser.add_argument("--raw", action="store_true", + help="output text as contained in the ABI files. " + "It not used, output will contain dynamically" + " generated cross references when possible.") + parser.add_argument("--no-file", action="store_true", + help="Don't the files section") + parser.add_argument("--show-hints", help="Show-hints") + + parser.set_defaults(func=self.run) + + def run(self, args): + """Run subparser""" + + parser = AbiParser(args.dir, debug=args.debug) + parser.parse_abi() + parser.check_issues() + + for t in parser.doc(args.raw, not args.no_file): + if args.enable_lineno: + print (f".. LINENO {t[1]}#{t[2]}\n\n") + + print(t[0]) + +class AbiValidate: + """Initialize an argparse subparser for ABI validation""" + + def __init__(self, subparsers): + """Initialize argparse subparsers""" + + parser = subparsers.add_parser("validate", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="list events") + + parser.set_defaults(func=self.run) + + def run(self, args): + """Run subparser""" + + parser = AbiParser(args.dir, debug=args.debug) + parser.parse_abi() + parser.check_issues() + + +class AbiSearch: + """Initialize an argparse subparser for ABI search""" + + def __init__(self, subparsers): + """Initialize argparse subparsers""" + + parser = subparsers.add_parser("search", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Search ABI using a regular expression") + + parser.add_argument("expression", + help="Case-insensitive search pattern for the ABI symbol") + + parser.set_defaults(func=self.run) + + def run(self, args): + """Run subparser""" + + parser = AbiParser(args.dir, debug=args.debug) + parser.parse_abi() + parser.search_symbols(args.expression) + +UNDEFINED_DESC=""" +Check undefined ABIs on local machine. + +Read sysfs devnodes and check if the devnodes there are defined inside +ABI documentation. + +The search logic tries to minimize the number of regular expressions to +search per each symbol. + +By default, it runs on a single CPU, as Python support for CPU threads +is still experimental, and multi-process runs on Python is very slow. + +On experimental tests, if the number of ABI symbols to search per devnode +is contained on a limit of ~150 regular expressions, using a single CPU +is a lot faster than using multiple processes. However, if the number of +regular expressions to check is at the order of ~30000, using multiple +CPUs speeds up the check. +""" + +class AbiUndefined: + """ + Initialize an argparse subparser for logic to check undefined ABI at + the current machine's sysfs + """ + + def __init__(self, subparsers): + """Initialize argparse subparsers""" + + parser = subparsers.add_parser("undefined", + formatter_class=argparse.RawTextHelpFormatter, + description=UNDEFINED_DESC) + + parser.add_argument("-S", "--sysfs-dir", default="/sys", + help="directory where sysfs is mounted") + parser.add_argument("-s", "--search-string", + help="search string regular expression to limit symbol search") + parser.add_argument("-H", "--show-hints", action="store_true", + help="Hints about definitions for missing ABI symbols.") + parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1, + help="If bigger than one, enables multiprocessing.") + parser.add_argument("-c", "--max-chunk-size", type=int, default=50, + help="Maximum number of chunk size") + parser.add_argument("-f", "--found", action="store_true", + help="Also show found items. " + "Helpful to debug the parser."), + parser.add_argument("-d", "--dry-run", action="store_true", + help="Don't actually search for undefined. " + "Helpful to debug the parser."), + + parser.set_defaults(func=self.run) + + def run(self, args): + """Run subparser""" + + abi = AbiRegex(args.dir, debug=args.debug, + search_string=args.search_string) + + abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints, + sysfs=args.sysfs_dir) + + abi_symbols.check_undefined_symbols(dry_run=args.dry_run, + found=args.found, + max_workers=args.jobs, + chunk_size=args.max_chunk_size) + + +def main(): + """Main program""" + + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument("-d", "--debug", type=int, default=0, help="debug level") + parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP) + + subparsers = parser.add_subparsers() + + AbiRest(subparsers) + AbiValidate(subparsers) + AbiSearch(subparsers) + AbiUndefined(subparsers) + + args = parser.parse_args() + + if args.debug: + level = logging.DEBUG + else: + level = logging.INFO + + logging.basicConfig(level=level, format="[%(levelname)s] %(message)s") + + if "func" in args: + args.func(args) + else: + sys.exit(f"Please specify a valid command for {sys.argv[0]}") + + +# Call main method +if __name__ == "__main__": + main() -- cgit v1.2.3 From f1c2db1f145b5c609ae651d229713e3c7422785a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 12:21:50 -0600 Subject: docs: move test_doc_build.py to tools/docs Add this tool to tools/docs. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/sphinx.rst | 2 +- scripts/test_doc_build.py | 513 ------------------------------------- tools/docs/test_doc_build.py | 513 +++++++++++++++++++++++++++++++++++++ 3 files changed, 514 insertions(+), 514 deletions(-) delete mode 100755 scripts/test_doc_build.py create mode 100755 tools/docs/test_doc_build.py (limited to 'scripts') diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 932f68c53075..51c370260f3b 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -149,7 +149,7 @@ a venv with it with, and install minimal requirements with:: A more comprehensive test can be done by using: - scripts/test_doc_build.py + tools/docs/test_doc_build.py Such script create one Python venv per supported version, optionally building documentation for a range of Sphinx versions. diff --git a/scripts/test_doc_build.py b/scripts/test_doc_build.py deleted file mode 100755 index 47b4606569f9..000000000000 --- a/scripts/test_doc_build.py +++ /dev/null @@ -1,513 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab -# -# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301 - -""" -Install minimal supported requirements for different Sphinx versions -and optionally test the build. -""" - -import argparse -import asyncio -import os.path -import shutil -import sys -import time -import subprocess - -# Minimal python version supported by the building system. - -PYTHON = os.path.basename(sys.executable) - -min_python_bin = None - -for i in range(9, 13): - p = f"python3.{i}" - if shutil.which(p): - min_python_bin = p - break - -if not min_python_bin: - min_python_bin = PYTHON - -# Starting from 8.0, Python 3.9 is not supported anymore. -PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON} - -DEFAULT_VERSIONS_TO_TEST = [ - (3, 4, 3), # Minimal supported version - (5, 3, 0), # CentOS Stream 9 / AlmaLinux 9 - (6, 1, 1), # Debian 12 - (7, 2, 1), # openSUSE Leap 15.6 - (7, 2, 6), # Ubuntu 24.04 LTS - (7, 4, 7), # Ubuntu 24.10 - (7, 3, 0), # openSUSE Tumbleweed - (8, 1, 3), # Fedora 42 - (8, 2, 3) # Latest version - covers rolling distros -] - -# Sphinx versions to be installed and their incremental requirements -SPHINX_REQUIREMENTS = { - # Oldest versions we support for each package required by Sphinx 3.4.3 - (3, 4, 3): { - "docutils": "0.16", - "alabaster": "0.7.12", - "babel": "2.8.0", - "certifi": "2020.6.20", - "docutils": "0.16", - "idna": "2.10", - "imagesize": "1.2.0", - "Jinja2": "2.11.2", - "MarkupSafe": "1.1.1", - "packaging": "20.4", - "Pygments": "2.6.1", - "PyYAML": "5.1", - "requests": "2.24.0", - "snowballstemmer": "2.0.0", - "sphinxcontrib-applehelp": "1.0.2", - "sphinxcontrib-devhelp": "1.0.2", - "sphinxcontrib-htmlhelp": "1.0.3", - "sphinxcontrib-jsmath": "1.0.1", - "sphinxcontrib-qthelp": "1.0.3", - "sphinxcontrib-serializinghtml": "1.1.4", - "urllib3": "1.25.9", - }, - - # Update package dependencies to a more modern base. The goal here - # is to avoid to many incremental changes for the next entries - (3, 5, 0): { - "alabaster": "0.7.13", - "babel": "2.17.0", - "certifi": "2025.6.15", - "idna": "3.10", - "imagesize": "1.4.1", - "packaging": "25.0", - "Pygments": "2.8.1", - "requests": "2.32.4", - "snowballstemmer": "3.0.1", - "sphinxcontrib-applehelp": "1.0.4", - "sphinxcontrib-htmlhelp": "2.0.1", - "sphinxcontrib-serializinghtml": "1.1.5", - "urllib3": "2.0.0", - }, - - # Starting from here, ensure all docutils versions are covered with - # supported Sphinx versions. Other packages are upgraded only when - # required by pip - (4, 0, 0): { - "PyYAML": "5.1", - }, - (4, 1, 0): { - "docutils": "0.17", - "Pygments": "2.19.1", - "Jinja2": "3.0.3", - "MarkupSafe": "2.0", - }, - (4, 3, 0): {}, - (4, 4, 0): {}, - (4, 5, 0): { - "docutils": "0.17.1", - }, - (5, 0, 0): {}, - (5, 1, 0): {}, - (5, 2, 0): { - "docutils": "0.18", - "Jinja2": "3.1.2", - "MarkupSafe": "2.0", - "PyYAML": "5.3.1", - }, - (5, 3, 0): { - "docutils": "0.18.1", - }, - (6, 0, 0): {}, - (6, 1, 0): {}, - (6, 2, 0): { - "PyYAML": "5.4.1", - }, - (7, 0, 0): {}, - (7, 1, 0): {}, - (7, 2, 0): { - "docutils": "0.19", - "PyYAML": "6.0.1", - "sphinxcontrib-serializinghtml": "1.1.9", - }, - (7, 2, 6): { - "docutils": "0.20", - }, - (7, 3, 0): { - "alabaster": "0.7.14", - "PyYAML": "6.0.1", - "tomli": "2.0.1", - }, - (7, 4, 0): { - "docutils": "0.20.1", - "PyYAML": "6.0.1", - }, - (8, 0, 0): { - "docutils": "0.21", - }, - (8, 1, 0): { - "docutils": "0.21.1", - "PyYAML": "6.0.1", - "sphinxcontrib-applehelp": "1.0.7", - "sphinxcontrib-devhelp": "1.0.6", - "sphinxcontrib-htmlhelp": "2.0.6", - "sphinxcontrib-qthelp": "1.0.6", - }, - (8, 2, 0): { - "docutils": "0.21.2", - "PyYAML": "6.0.1", - "sphinxcontrib-serializinghtml": "1.1.9", - }, -} - - -class AsyncCommands: - """Excecute command synchronously""" - - def __init__(self, fp=None): - - self.stdout = None - self.stderr = None - self.output = None - self.fp = fp - - def log(self, out, verbose, is_info=True): - out = out.removesuffix('\n') - - if verbose: - if is_info: - print(out) - else: - print(out, file=sys.stderr) - - if self.fp: - self.fp.write(out + "\n") - - async def _read(self, stream, verbose, is_info): - """Ancillary routine to capture while displaying""" - - while stream is not None: - line = await stream.readline() - if line: - out = line.decode("utf-8", errors="backslashreplace") - self.log(out, verbose, is_info) - if is_info: - self.stdout += out - else: - self.stderr += out - else: - break - - async def run(self, cmd, capture_output=False, check=False, - env=None, verbose=True): - - """ - Execute an arbitrary command, handling errors. - - Please notice that this class is not thread safe - """ - - self.stdout = "" - self.stderr = "" - - self.log("$ " + " ".join(cmd), verbose) - - proc = await asyncio.create_subprocess_exec(cmd[0], - *cmd[1:], - env=env, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE) - - # Handle input and output in realtime - await asyncio.gather( - self._read(proc.stdout, verbose, True), - self._read(proc.stderr, verbose, False), - ) - - await proc.wait() - - if check and proc.returncode > 0: - raise subprocess.CalledProcessError(returncode=proc.returncode, - cmd=" ".join(cmd), - output=self.stdout, - stderr=self.stderr) - - if capture_output: - if proc.returncode > 0: - self.log(f"Error {proc.returncode}", verbose=True, is_info=False) - return "" - - return self.output - - ret = subprocess.CompletedProcess(args=cmd, - returncode=proc.returncode, - stdout=self.stdout, - stderr=self.stderr) - - return ret - - -class SphinxVenv: - """ - Installs Sphinx on one virtual env per Sphinx version with a minimal - set of dependencies, adjusting them to each specific version. - """ - - def __init__(self): - """Initialize instance variables""" - - self.built_time = {} - self.first_run = True - - async def _handle_version(self, args, fp, - cur_ver, cur_requirements, python_bin): - """Handle a single Sphinx version""" - - cmd = AsyncCommands(fp) - - ver = ".".join(map(str, cur_ver)) - - if not self.first_run and args.wait_input and args.build: - ret = input("Press Enter to continue or 'a' to abort: ").strip().lower() - if ret == "a": - print("Aborted.") - sys.exit() - else: - self.first_run = False - - venv_dir = f"Sphinx_{ver}" - req_file = f"requirements_{ver}.txt" - - cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True) - - # Create venv - await cmd.run([python_bin, "-m", "venv", venv_dir], - verbose=args.verbose, check=True) - pip = os.path.join(venv_dir, "bin/pip") - - # Create install list - reqs = [] - for pkg, verstr in cur_requirements.items(): - reqs.append(f"{pkg}=={verstr}") - - reqs.append(f"Sphinx=={ver}") - - await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose) - - # Freeze environment - result = await cmd.run([pip, "freeze"], verbose=False, check=True) - - # Pip install succeeded. Write requirements file - if args.req_file: - with open(req_file, "w", encoding="utf-8") as fp: - fp.write(result.stdout) - - if args.build: - start_time = time.time() - - # Prepare a venv environment - env = os.environ.copy() - bin_dir = os.path.join(venv_dir, "bin") - env["PATH"] = bin_dir + ":" + env["PATH"] - env["VIRTUAL_ENV"] = venv_dir - if "PYTHONHOME" in env: - del env["PYTHONHOME"] - - # Test doc build - await cmd.run(["make", "cleandocs"], env=env, check=True) - make = ["make"] - - if args.output: - sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build") - make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"] - - if args.make_args: - make += args.make_args - - make += args.targets - - if args.verbose: - cmd.log(f". {bin_dir}/activate", verbose=True) - await cmd.run(make, env=env, check=True, verbose=True) - if args.verbose: - cmd.log("deactivate", verbose=True) - - end_time = time.time() - elapsed_time = end_time - start_time - hours, minutes = divmod(elapsed_time, 3600) - minutes, seconds = divmod(minutes, 60) - - hours = int(hours) - minutes = int(minutes) - seconds = int(seconds) - - self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - - cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True) - - async def run(self, args): - """ - Navigate though multiple Sphinx versions, handling each of them - on a loop. - """ - - if args.log: - fp = open(args.log, "w", encoding="utf-8") - if not args.verbose: - args.verbose = False - else: - fp = None - if not args.verbose: - args.verbose = True - - cur_requirements = {} - python_bin = min_python_bin - - vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions) - - for cur_ver in sorted(vers): - if cur_ver in SPHINX_REQUIREMENTS: - new_reqs = SPHINX_REQUIREMENTS[cur_ver] - cur_requirements.update(new_reqs) - - if cur_ver in PYTHON_VER_CHANGES: # pylint: disable=R1715 - python_bin = PYTHON_VER_CHANGES[cur_ver] - - if cur_ver not in args.versions: - continue - - if args.min_version: - if cur_ver < args.min_version: - continue - - if args.max_version: - if cur_ver > args.max_version: - break - - await self._handle_version(args, fp, cur_ver, cur_requirements, - python_bin) - - if args.build: - cmd = AsyncCommands(fp) - cmd.log("\nSummary:", verbose=True) - for ver, elapsed_time in sorted(self.built_time.items()): - cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}", - verbose=True) - - if fp: - fp.close() - -def parse_version(ver_str): - """Convert a version string into a tuple.""" - - return tuple(map(int, ver_str.split("."))) - - -DEFAULT_VERS = " - " -DEFAULT_VERS += "\n - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}", - DEFAULT_VERSIONS_TO_TEST)) - -SCRIPT = os.path.relpath(__file__) - -DESCRIPTION = f""" -This tool allows creating Python virtual environments for different -Sphinx versions that are supported by the Linux Kernel build system. - -Besides creating the virtual environment, it can also test building -the documentation using "make htmldocs" (and/or other doc targets). - -If called without "--versions" argument, it covers the versions shipped -on major distros, plus the lowest supported version: - -{DEFAULT_VERS} - -A typical usage is to run: - - {SCRIPT} -m -l sphinx_builds.log - -This will create one virtual env for the default version set and run -"make htmldocs" for each version, creating a log file with the -excecuted commands on it. - -NOTE: The build time can be very long, specially on old versions. Also, there -is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of -memory. That, together with "-jauto" may cause OOM killer to cause -failures at the doc generation. To minimize the risk, you may use the -"-a" command line parameter to constrain the built directories and/or -reduce the number of threads from "-jauto" to, for instance, "-j4": - - {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'" - -""" - -MAKE_TARGETS = [ - "htmldocs", - "texinfodocs", - "infodocs", - "latexdocs", - "pdfdocs", - "epubdocs", - "xmldocs", -] - -async def main(): - """Main program""" - - parser = argparse.ArgumentParser(description=DESCRIPTION, - formatter_class=argparse.RawDescriptionHelpFormatter) - - ver_group = parser.add_argument_group("Version range options") - - ver_group.add_argument('-V', '--versions', nargs="*", - default=DEFAULT_VERSIONS_TO_TEST,type=parse_version, - help='Sphinx versions to test') - ver_group.add_argument('--min-version', "--min", type=parse_version, - help='Sphinx minimal version') - ver_group.add_argument('--max-version', "--max", type=parse_version, - help='Sphinx maximum version') - ver_group.add_argument('-f', '--full', action='store_true', - help='Add all Sphinx (major,minor) supported versions to the version range') - - build_group = parser.add_argument_group("Build options") - - build_group.add_argument('-b', '--build', action='store_true', - help='Build documentation') - build_group.add_argument('-a', '--make-args', nargs="*", - help='extra arguments for make, like SPHINXDIRS=netlink/specs', - ) - build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS, - default=[MAKE_TARGETS[0]], - help="make build targets. Default: htmldocs.") - build_group.add_argument("-o", '--output', - help="output directory for the make O=OUTPUT") - - other_group = parser.add_argument_group("Other options") - - other_group.add_argument('-r', '--req-file', action='store_true', - help='write a requirements.txt file') - other_group.add_argument('-l', '--log', - help='Log command output on a file') - other_group.add_argument('-v', '--verbose', action='store_true', - help='Verbose all commands') - other_group.add_argument('-i', '--wait-input', action='store_true', - help='Wait for an enter before going to the next version') - - args = parser.parse_args() - - if not args.make_args: - args.make_args = [] - - sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys())) - - if args.full: - args.versions += list(SPHINX_REQUIREMENTS.keys()) - - venv = SphinxVenv() - await venv.run(args) - - -# Call main method -if __name__ == "__main__": - asyncio.run(main()) diff --git a/tools/docs/test_doc_build.py b/tools/docs/test_doc_build.py new file mode 100755 index 000000000000..47b4606569f9 --- /dev/null +++ b/tools/docs/test_doc_build.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab +# +# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301 + +""" +Install minimal supported requirements for different Sphinx versions +and optionally test the build. +""" + +import argparse +import asyncio +import os.path +import shutil +import sys +import time +import subprocess + +# Minimal python version supported by the building system. + +PYTHON = os.path.basename(sys.executable) + +min_python_bin = None + +for i in range(9, 13): + p = f"python3.{i}" + if shutil.which(p): + min_python_bin = p + break + +if not min_python_bin: + min_python_bin = PYTHON + +# Starting from 8.0, Python 3.9 is not supported anymore. +PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON} + +DEFAULT_VERSIONS_TO_TEST = [ + (3, 4, 3), # Minimal supported version + (5, 3, 0), # CentOS Stream 9 / AlmaLinux 9 + (6, 1, 1), # Debian 12 + (7, 2, 1), # openSUSE Leap 15.6 + (7, 2, 6), # Ubuntu 24.04 LTS + (7, 4, 7), # Ubuntu 24.10 + (7, 3, 0), # openSUSE Tumbleweed + (8, 1, 3), # Fedora 42 + (8, 2, 3) # Latest version - covers rolling distros +] + +# Sphinx versions to be installed and their incremental requirements +SPHINX_REQUIREMENTS = { + # Oldest versions we support for each package required by Sphinx 3.4.3 + (3, 4, 3): { + "docutils": "0.16", + "alabaster": "0.7.12", + "babel": "2.8.0", + "certifi": "2020.6.20", + "docutils": "0.16", + "idna": "2.10", + "imagesize": "1.2.0", + "Jinja2": "2.11.2", + "MarkupSafe": "1.1.1", + "packaging": "20.4", + "Pygments": "2.6.1", + "PyYAML": "5.1", + "requests": "2.24.0", + "snowballstemmer": "2.0.0", + "sphinxcontrib-applehelp": "1.0.2", + "sphinxcontrib-devhelp": "1.0.2", + "sphinxcontrib-htmlhelp": "1.0.3", + "sphinxcontrib-jsmath": "1.0.1", + "sphinxcontrib-qthelp": "1.0.3", + "sphinxcontrib-serializinghtml": "1.1.4", + "urllib3": "1.25.9", + }, + + # Update package dependencies to a more modern base. The goal here + # is to avoid to many incremental changes for the next entries + (3, 5, 0): { + "alabaster": "0.7.13", + "babel": "2.17.0", + "certifi": "2025.6.15", + "idna": "3.10", + "imagesize": "1.4.1", + "packaging": "25.0", + "Pygments": "2.8.1", + "requests": "2.32.4", + "snowballstemmer": "3.0.1", + "sphinxcontrib-applehelp": "1.0.4", + "sphinxcontrib-htmlhelp": "2.0.1", + "sphinxcontrib-serializinghtml": "1.1.5", + "urllib3": "2.0.0", + }, + + # Starting from here, ensure all docutils versions are covered with + # supported Sphinx versions. Other packages are upgraded only when + # required by pip + (4, 0, 0): { + "PyYAML": "5.1", + }, + (4, 1, 0): { + "docutils": "0.17", + "Pygments": "2.19.1", + "Jinja2": "3.0.3", + "MarkupSafe": "2.0", + }, + (4, 3, 0): {}, + (4, 4, 0): {}, + (4, 5, 0): { + "docutils": "0.17.1", + }, + (5, 0, 0): {}, + (5, 1, 0): {}, + (5, 2, 0): { + "docutils": "0.18", + "Jinja2": "3.1.2", + "MarkupSafe": "2.0", + "PyYAML": "5.3.1", + }, + (5, 3, 0): { + "docutils": "0.18.1", + }, + (6, 0, 0): {}, + (6, 1, 0): {}, + (6, 2, 0): { + "PyYAML": "5.4.1", + }, + (7, 0, 0): {}, + (7, 1, 0): {}, + (7, 2, 0): { + "docutils": "0.19", + "PyYAML": "6.0.1", + "sphinxcontrib-serializinghtml": "1.1.9", + }, + (7, 2, 6): { + "docutils": "0.20", + }, + (7, 3, 0): { + "alabaster": "0.7.14", + "PyYAML": "6.0.1", + "tomli": "2.0.1", + }, + (7, 4, 0): { + "docutils": "0.20.1", + "PyYAML": "6.0.1", + }, + (8, 0, 0): { + "docutils": "0.21", + }, + (8, 1, 0): { + "docutils": "0.21.1", + "PyYAML": "6.0.1", + "sphinxcontrib-applehelp": "1.0.7", + "sphinxcontrib-devhelp": "1.0.6", + "sphinxcontrib-htmlhelp": "2.0.6", + "sphinxcontrib-qthelp": "1.0.6", + }, + (8, 2, 0): { + "docutils": "0.21.2", + "PyYAML": "6.0.1", + "sphinxcontrib-serializinghtml": "1.1.9", + }, +} + + +class AsyncCommands: + """Excecute command synchronously""" + + def __init__(self, fp=None): + + self.stdout = None + self.stderr = None + self.output = None + self.fp = fp + + def log(self, out, verbose, is_info=True): + out = out.removesuffix('\n') + + if verbose: + if is_info: + print(out) + else: + print(out, file=sys.stderr) + + if self.fp: + self.fp.write(out + "\n") + + async def _read(self, stream, verbose, is_info): + """Ancillary routine to capture while displaying""" + + while stream is not None: + line = await stream.readline() + if line: + out = line.decode("utf-8", errors="backslashreplace") + self.log(out, verbose, is_info) + if is_info: + self.stdout += out + else: + self.stderr += out + else: + break + + async def run(self, cmd, capture_output=False, check=False, + env=None, verbose=True): + + """ + Execute an arbitrary command, handling errors. + + Please notice that this class is not thread safe + """ + + self.stdout = "" + self.stderr = "" + + self.log("$ " + " ".join(cmd), verbose) + + proc = await asyncio.create_subprocess_exec(cmd[0], + *cmd[1:], + env=env, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE) + + # Handle input and output in realtime + await asyncio.gather( + self._read(proc.stdout, verbose, True), + self._read(proc.stderr, verbose, False), + ) + + await proc.wait() + + if check and proc.returncode > 0: + raise subprocess.CalledProcessError(returncode=proc.returncode, + cmd=" ".join(cmd), + output=self.stdout, + stderr=self.stderr) + + if capture_output: + if proc.returncode > 0: + self.log(f"Error {proc.returncode}", verbose=True, is_info=False) + return "" + + return self.output + + ret = subprocess.CompletedProcess(args=cmd, + returncode=proc.returncode, + stdout=self.stdout, + stderr=self.stderr) + + return ret + + +class SphinxVenv: + """ + Installs Sphinx on one virtual env per Sphinx version with a minimal + set of dependencies, adjusting them to each specific version. + """ + + def __init__(self): + """Initialize instance variables""" + + self.built_time = {} + self.first_run = True + + async def _handle_version(self, args, fp, + cur_ver, cur_requirements, python_bin): + """Handle a single Sphinx version""" + + cmd = AsyncCommands(fp) + + ver = ".".join(map(str, cur_ver)) + + if not self.first_run and args.wait_input and args.build: + ret = input("Press Enter to continue or 'a' to abort: ").strip().lower() + if ret == "a": + print("Aborted.") + sys.exit() + else: + self.first_run = False + + venv_dir = f"Sphinx_{ver}" + req_file = f"requirements_{ver}.txt" + + cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True) + + # Create venv + await cmd.run([python_bin, "-m", "venv", venv_dir], + verbose=args.verbose, check=True) + pip = os.path.join(venv_dir, "bin/pip") + + # Create install list + reqs = [] + for pkg, verstr in cur_requirements.items(): + reqs.append(f"{pkg}=={verstr}") + + reqs.append(f"Sphinx=={ver}") + + await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose) + + # Freeze environment + result = await cmd.run([pip, "freeze"], verbose=False, check=True) + + # Pip install succeeded. Write requirements file + if args.req_file: + with open(req_file, "w", encoding="utf-8") as fp: + fp.write(result.stdout) + + if args.build: + start_time = time.time() + + # Prepare a venv environment + env = os.environ.copy() + bin_dir = os.path.join(venv_dir, "bin") + env["PATH"] = bin_dir + ":" + env["PATH"] + env["VIRTUAL_ENV"] = venv_dir + if "PYTHONHOME" in env: + del env["PYTHONHOME"] + + # Test doc build + await cmd.run(["make", "cleandocs"], env=env, check=True) + make = ["make"] + + if args.output: + sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build") + make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"] + + if args.make_args: + make += args.make_args + + make += args.targets + + if args.verbose: + cmd.log(f". {bin_dir}/activate", verbose=True) + await cmd.run(make, env=env, check=True, verbose=True) + if args.verbose: + cmd.log("deactivate", verbose=True) + + end_time = time.time() + elapsed_time = end_time - start_time + hours, minutes = divmod(elapsed_time, 3600) + minutes, seconds = divmod(minutes, 60) + + hours = int(hours) + minutes = int(minutes) + seconds = int(seconds) + + self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + + cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True) + + async def run(self, args): + """ + Navigate though multiple Sphinx versions, handling each of them + on a loop. + """ + + if args.log: + fp = open(args.log, "w", encoding="utf-8") + if not args.verbose: + args.verbose = False + else: + fp = None + if not args.verbose: + args.verbose = True + + cur_requirements = {} + python_bin = min_python_bin + + vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions) + + for cur_ver in sorted(vers): + if cur_ver in SPHINX_REQUIREMENTS: + new_reqs = SPHINX_REQUIREMENTS[cur_ver] + cur_requirements.update(new_reqs) + + if cur_ver in PYTHON_VER_CHANGES: # pylint: disable=R1715 + python_bin = PYTHON_VER_CHANGES[cur_ver] + + if cur_ver not in args.versions: + continue + + if args.min_version: + if cur_ver < args.min_version: + continue + + if args.max_version: + if cur_ver > args.max_version: + break + + await self._handle_version(args, fp, cur_ver, cur_requirements, + python_bin) + + if args.build: + cmd = AsyncCommands(fp) + cmd.log("\nSummary:", verbose=True) + for ver, elapsed_time in sorted(self.built_time.items()): + cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}", + verbose=True) + + if fp: + fp.close() + +def parse_version(ver_str): + """Convert a version string into a tuple.""" + + return tuple(map(int, ver_str.split("."))) + + +DEFAULT_VERS = " - " +DEFAULT_VERS += "\n - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}", + DEFAULT_VERSIONS_TO_TEST)) + +SCRIPT = os.path.relpath(__file__) + +DESCRIPTION = f""" +This tool allows creating Python virtual environments for different +Sphinx versions that are supported by the Linux Kernel build system. + +Besides creating the virtual environment, it can also test building +the documentation using "make htmldocs" (and/or other doc targets). + +If called without "--versions" argument, it covers the versions shipped +on major distros, plus the lowest supported version: + +{DEFAULT_VERS} + +A typical usage is to run: + + {SCRIPT} -m -l sphinx_builds.log + +This will create one virtual env for the default version set and run +"make htmldocs" for each version, creating a log file with the +excecuted commands on it. + +NOTE: The build time can be very long, specially on old versions. Also, there +is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of +memory. That, together with "-jauto" may cause OOM killer to cause +failures at the doc generation. To minimize the risk, you may use the +"-a" command line parameter to constrain the built directories and/or +reduce the number of threads from "-jauto" to, for instance, "-j4": + + {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'" + +""" + +MAKE_TARGETS = [ + "htmldocs", + "texinfodocs", + "infodocs", + "latexdocs", + "pdfdocs", + "epubdocs", + "xmldocs", +] + +async def main(): + """Main program""" + + parser = argparse.ArgumentParser(description=DESCRIPTION, + formatter_class=argparse.RawDescriptionHelpFormatter) + + ver_group = parser.add_argument_group("Version range options") + + ver_group.add_argument('-V', '--versions', nargs="*", + default=DEFAULT_VERSIONS_TO_TEST,type=parse_version, + help='Sphinx versions to test') + ver_group.add_argument('--min-version', "--min", type=parse_version, + help='Sphinx minimal version') + ver_group.add_argument('--max-version', "--max", type=parse_version, + help='Sphinx maximum version') + ver_group.add_argument('-f', '--full', action='store_true', + help='Add all Sphinx (major,minor) supported versions to the version range') + + build_group = parser.add_argument_group("Build options") + + build_group.add_argument('-b', '--build', action='store_true', + help='Build documentation') + build_group.add_argument('-a', '--make-args', nargs="*", + help='extra arguments for make, like SPHINXDIRS=netlink/specs', + ) + build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS, + default=[MAKE_TARGETS[0]], + help="make build targets. Default: htmldocs.") + build_group.add_argument("-o", '--output', + help="output directory for the make O=OUTPUT") + + other_group = parser.add_argument_group("Other options") + + other_group.add_argument('-r', '--req-file', action='store_true', + help='write a requirements.txt file') + other_group.add_argument('-l', '--log', + help='Log command output on a file') + other_group.add_argument('-v', '--verbose', action='store_true', + help='Verbose all commands') + other_group.add_argument('-i', '--wait-input', action='store_true', + help='Wait for an enter before going to the next version') + + args = parser.parse_args() + + if not args.make_args: + args.make_args = [] + + sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys())) + + if args.full: + args.versions += list(SPHINX_REQUIREMENTS.keys()) + + venv = SphinxVenv() + await venv.run(args) + + +# Call main method +if __name__ == "__main__": + asyncio.run(main()) -- cgit v1.2.3 From 184414c6a6cac78ad6c46037a8afad5c9f04fba5 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 13:03:08 -0600 Subject: docs: move find-unused-docs.sh to tools/docs ...and update references accordingly. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/contributing.rst | 2 +- .../translations/zh_CN/doc-guide/contributing.rst | 2 +- scripts/find-unused-docs.sh | 62 ---------------------- tools/docs/find-unused-docs.sh | 62 ++++++++++++++++++++++ 4 files changed, 64 insertions(+), 64 deletions(-) delete mode 100755 scripts/find-unused-docs.sh create mode 100755 tools/docs/find-unused-docs.sh (limited to 'scripts') diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst index 662c7a840cd5..f8047e633113 100644 --- a/Documentation/doc-guide/contributing.rst +++ b/Documentation/doc-guide/contributing.rst @@ -152,7 +152,7 @@ generate links to that documentation. Adding ``kernel-doc`` directives to the documentation to bring those comments in can help the community derive the full value of the work that has gone into creating them. -The ``scripts/find-unused-docs.sh`` tool can be used to find these +The ``tools/docs/find-unused-docs.sh`` tool can be used to find these overlooked comments. Note that the most value comes from pulling in the documentation for diff --git a/Documentation/translations/zh_CN/doc-guide/contributing.rst b/Documentation/translations/zh_CN/doc-guide/contributing.rst index 394a13b438b0..b0c8ba782b16 100644 --- a/Documentation/translations/zh_CN/doc-guide/contributing.rst +++ b/Documentation/translations/zh_CN/doc-guide/contributing.rst @@ -124,7 +124,7 @@ C代码编译器发出的警告常常会被视为误报,从而导致出现了 这使得这些信息更难找到,例如使Sphinx无法生成指向该文档的链接。将 ``kernel-doc`` 指令添加到文档中以引入这些注释可以帮助社区获得为编写注释所做工作的全部价值。 -``scripts/find-unused-docs.sh`` 工具可以用来找到这些被忽略的评论。 +``tools/docs/find-unused-docs.sh`` 工具可以用来找到这些被忽略的评论。 请注意,将导出的函数和数据结构引入文档是最有价值的。许多子系统还具有供内部 使用的kernel-doc注释;除非这些注释放在专门针对相关子系统开发人员的文档中, diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh deleted file mode 100755 index d6d397fbf917..000000000000 --- a/scripts/find-unused-docs.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# (c) 2017, Jonathan Corbet -# sayli karnik -# -# This script detects files with kernel-doc comments for exported functions -# that are not included in documentation. -# -# usage: Run 'scripts/find-unused-docs.sh directory' from top level of kernel -# tree. -# -# example: $scripts/find-unused-docs.sh drivers/scsi -# -# Licensed under the terms of the GNU GPL License - -if ! [ -d "Documentation" ]; then - echo "Run from top level of kernel tree" - exit 1 -fi - -if [ "$#" -ne 1 ]; then - echo "Usage: scripts/find-unused-docs.sh directory" - exit 1 -fi - -if ! [ -d "$1" ]; then - echo "Directory $1 doesn't exist" - exit 1 -fi - -cd "$( dirname "${BASH_SOURCE[0]}" )" -cd .. - -cd Documentation/ - -echo "The following files contain kerneldoc comments for exported functions \ -that are not used in the formatted documentation" - -# FILES INCLUDED - -files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3)) - -declare -A FILES_INCLUDED - -for each in "${files_included[@]}"; do - FILES_INCLUDED[$each]="$each" - done - -cd .. - -# FILES NOT INCLUDED - -for file in `find $1 -name '*.c'`; do - - if [[ ${FILES_INCLUDED[$file]+_} ]]; then - continue; - fi - str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null) - if [[ -n "$str" ]]; then - echo "$file" - fi - done - diff --git a/tools/docs/find-unused-docs.sh b/tools/docs/find-unused-docs.sh new file mode 100755 index 000000000000..05552dbda5bc --- /dev/null +++ b/tools/docs/find-unused-docs.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# (c) 2017, Jonathan Corbet +# sayli karnik +# +# This script detects files with kernel-doc comments for exported functions +# that are not included in documentation. +# +# usage: Run 'tools/docs/find-unused-docs.sh directory' from top level of kernel +# tree. +# +# example: $tools/docs/find-unused-docs.sh drivers/scsi +# +# Licensed under the terms of the GNU GPL License + +if ! [ -d "Documentation" ]; then + echo "Run from top level of kernel tree" + exit 1 +fi + +if [ "$#" -ne 1 ]; then + echo "Usage: tools/docs/find-unused-docs.sh directory" + exit 1 +fi + +if ! [ -d "$1" ]; then + echo "Directory $1 doesn't exist" + exit 1 +fi + +cd "$( dirname "${BASH_SOURCE[0]}" )" +cd .. + +cd Documentation/ + +echo "The following files contain kerneldoc comments for exported functions \ +that are not used in the formatted documentation" + +# FILES INCLUDED + +files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3)) + +declare -A FILES_INCLUDED + +for each in "${files_included[@]}"; do + FILES_INCLUDED[$each]="$each" + done + +cd .. + +# FILES NOT INCLUDED + +for file in `find $1 -name '*.c'`; do + + if [[ ${FILES_INCLUDED[$file]+_} ]]; then + continue; + fi + str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null) + if [[ -n "$str" ]]; then + echo "$file" + fi + done + -- cgit v1.2.3 From 683e8cbaba7f0baf94a774ee17a1c0ddf3b243b4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 13:08:22 -0600 Subject: docs: remove kernel-doc.pl We've been using the Python version and nobody has missed this one. All credit goes to Mauro Carvalho Chehab for creating the replacement. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/kernel-doc.pl | 2439 ------------------------------------------------- 1 file changed, 2439 deletions(-) delete mode 100755 scripts/kernel-doc.pl (limited to 'scripts') diff --git a/scripts/kernel-doc.pl b/scripts/kernel-doc.pl deleted file mode 100755 index 5db23cbf4eb2..000000000000 --- a/scripts/kernel-doc.pl +++ /dev/null @@ -1,2439 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# vim: softtabstop=4 - -use warnings; -use strict; - -## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## -## Copyright (C) 2000, 1 Tim Waugh ## -## Copyright (C) 2001 Simon Huggins ## -## Copyright (C) 2005-2012 Randy Dunlap ## -## Copyright (C) 2012 Dan Luedtke ## -## ## -## #define enhancements by Armin Kuster ## -## Copyright (c) 2000 MontaVista Software, Inc. ## -# -# Copyright (C) 2022 Tomasz Warniełło (POD) - -use Pod::Usage qw/pod2usage/; - -=head1 NAME - -kernel-doc - Print formatted kernel documentation to stdout - -=head1 SYNOPSIS - - kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections] - [ -man | - -rst [-enable-lineno] | - -none - ] - [ - -export | - -internal | - [-function NAME] ... | - [-nosymbol NAME] ... - ] - [-no-doc-sections] - [-export-file FILE] ... - FILE ... - -Run `kernel-doc -h` for details. - -=head1 DESCRIPTION - -Read C language source or header FILEs, extract embedded documentation comments, -and print formatted documentation to standard output. - -The documentation comments are identified by the "/**" opening comment mark. - -See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. - -=cut - -# more perldoc at the end of the file - -## init lots of data - -my $errors = 0; -my $warnings = 0; -my $anon_struct_union = 0; - -# match expressions used to find embedded type information -my $type_constant = '\b``([^\`]+)``\b'; -my $type_constant2 = '\%([-_*\w]+)'; -my $type_func = '(\w+)\(\)'; -my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params -my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params -my $type_env = '(\$\w+)'; -my $type_enum = '\&(enum\s*([_\w]+))'; -my $type_struct = '\&(struct\s*([_\w]+))'; -my $type_typedef = '\&(typedef\s*([_\w]+))'; -my $type_union = '\&(union\s*([_\w]+))'; -my $type_member = '\&([_\w]+)(\.|->)([_\w]+)'; -my $type_fallback = '\&([_\w]+)'; -my $type_member_func = $type_member . '\(\)'; - -# Output conversion substitutions. -# One for each output format - -# these are pretty rough -my @highlights_man = ( - [$type_constant, "\$1"], - [$type_constant2, "\$1"], - [$type_func, "\\\\fB\$1\\\\fP"], - [$type_enum, "\\\\fI\$1\\\\fP"], - [$type_struct, "\\\\fI\$1\\\\fP"], - [$type_typedef, "\\\\fI\$1\\\\fP"], - [$type_union, "\\\\fI\$1\\\\fP"], - [$type_param, "\\\\fI\$1\\\\fP"], - [$type_param_ref, "\\\\fI\$1\$2\\\\fP"], - [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], - [$type_fallback, "\\\\fI\$1\\\\fP"] - ); -my $blankline_man = ""; - -# rst-mode -my @highlights_rst = ( - [$type_constant, "``\$1``"], - [$type_constant2, "``\$1``"], - - # Note: need to escape () to avoid func matching later - [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], - [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], - [$type_fp_param, "**\$1\\\\(\\\\)**"], - [$type_fp_param2, "**\$1\\\\(\\\\)**"], - [$type_func, "\$1()"], - [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], - - # in rst this can refer to any type - [$type_fallback, "\\:c\\:type\\:`\$1`"], - [$type_param_ref, "**\$1\$2**"] - ); -my $blankline_rst = "\n"; - -# read arguments -if ($#ARGV == -1) { - pod2usage( - -message => "No arguments!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); -} - -my $kernelversion; - -my $dohighlight = ""; - -my $verbose = 0; -my $Werror = 0; -my $Wreturn = 0; -my $Wshort_desc = 0; -my $output_mode = "rst"; -my $output_preformatted = 0; -my $no_doc_sections = 0; -my $enable_lineno = 0; -my @highlights = @highlights_rst; -my $blankline = $blankline_rst; -my $modulename = "Kernel API"; - -use constant { - OUTPUT_ALL => 0, # output all symbols and doc sections - OUTPUT_INCLUDE => 1, # output only specified symbols - OUTPUT_EXPORTED => 2, # output exported symbols - OUTPUT_INTERNAL => 3, # output non-exported symbols -}; -my $output_selection = OUTPUT_ALL; -my $show_not_found = 0; # No longer used - -my @export_file_list; - -my @build_time; -if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && - (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { - @build_time = gmtime($seconds); -} else { - @build_time = localtime; -} - -my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', - 'November', 'December')[$build_time[4]] . - " " . ($build_time[5]+1900); - -# Essentially these are globals. -# They probably want to be tidied up, made more localised or something. -# CAVEAT EMPTOR! Some of the others I localised may not want to be, which -# could cause "use of undefined value" or other bugs. -my ($function, %function_table, %parametertypes, $declaration_purpose); -my %nosymbol_table = (); -my $declaration_start_line; -my ($type, $declaration_name, $return_type); -my ($newsection, $newcontents, $prototype, $brcount); - -if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') { - $verbose = 1; -} - -if (defined($ENV{'KCFLAGS'})) { - my $kcflags = "$ENV{'KCFLAGS'}"; - - if ($kcflags =~ /(\s|^)-Werror(\s|$)/) { - $Werror = 1; - } -} - -# reading this variable is for backwards compat just in case -# someone was calling it with the variable from outside the -# kernel's build system -if (defined($ENV{'KDOC_WERROR'})) { - $Werror = "$ENV{'KDOC_WERROR'}"; -} -# other environment variables are converted to command-line -# arguments in cmd_checkdoc in the build system - -# Generated docbook code is inserted in a template at a point where -# docbook v3.1 requires a non-zero sequence of RefEntry's; see: -# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html -# We keep track of number of generated entries and generate a dummy -# if needs be to ensure the expanded template can be postprocessed -# into html. -my $section_counter = 0; - -my $lineprefix=""; - -# Parser states -use constant { - STATE_NORMAL => 0, # normal code - STATE_NAME => 1, # looking for function name - STATE_BODY_MAYBE => 2, # body - or maybe more description - STATE_BODY => 3, # the body of the comment - STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line - STATE_PROTO => 5, # scanning prototype - STATE_DOCBLOCK => 6, # documentation block - STATE_INLINE => 7, # gathering doc outside main block -}; -my $state; -my $leading_space; - -# Inline documentation state -use constant { - STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) - STATE_INLINE_NAME => 1, # looking for member name (@foo:) - STATE_INLINE_TEXT => 2, # looking for member documentation - STATE_INLINE_END => 3, # done - STATE_INLINE_ERROR => 4, # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. -}; -my $inline_doc_state; - -#declaration types: can be -# 'function', 'struct', 'union', 'enum', 'typedef' -my $decl_type; - -# Name of the kernel-doc identifier for non-DOC markups -my $identifier; - -my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. -my $doc_end = '\*/'; -my $doc_com = '\s*\*\s*'; -my $doc_com_body = '\s*\* ?'; -my $doc_decl = $doc_com . '(\w+)'; -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -my $doc_sect = $doc_com . - '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$'; -my $doc_content = $doc_com_body . '(.*)'; -my $doc_block = $doc_com . 'DOC:\s*(.*)?'; -my $doc_inline_start = '^\s*/\*\*\s*$'; -my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; -my $doc_inline_end = '^\s*\*/\s*$'; -my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; -my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; -my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;'; -my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; -my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; - -my %parameterdescs; -my %parameterdesc_start_lines; -my @parameterlist; -my %sections; -my @sectionlist; -my %section_start_lines; -my $sectcheck; -my $struct_actual; - -my $contents = ""; -my $new_start_line = 0; - -# the canonical section names. see also $doc_sect above. -my $section_default = "Description"; # default section -my $section_intro = "Introduction"; -my $section = $section_default; -my $section_context = "Context"; -my $section_return = "Return"; - -my $undescribed = "-- undescribed --"; - -reset_state(); - -while ($ARGV[0] =~ m/^--?(.*)/) { - my $cmd = $1; - shift @ARGV; - if ($cmd eq "man") { - $output_mode = "man"; - @highlights = @highlights_man; - $blankline = $blankline_man; - } elsif ($cmd eq "rst") { - $output_mode = "rst"; - @highlights = @highlights_rst; - $blankline = $blankline_rst; - } elsif ($cmd eq "none") { - $output_mode = "none"; - } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document - $modulename = shift @ARGV; - } elsif ($cmd eq "function") { # to only output specific functions - $output_selection = OUTPUT_INCLUDE; - $function = shift @ARGV; - $function_table{$function} = 1; - } elsif ($cmd eq "nosymbol") { # Exclude specific symbols - my $symbol = shift @ARGV; - $nosymbol_table{$symbol} = 1; - } elsif ($cmd eq "export") { # only exported symbols - $output_selection = OUTPUT_EXPORTED; - %function_table = (); - } elsif ($cmd eq "internal") { # only non-exported symbols - $output_selection = OUTPUT_INTERNAL; - %function_table = (); - } elsif ($cmd eq "export-file") { - my $file = shift @ARGV; - push(@export_file_list, $file); - } elsif ($cmd eq "v") { - $verbose = 1; - } elsif ($cmd eq "Werror") { - $Werror = 1; - } elsif ($cmd eq "Wreturn") { - $Wreturn = 1; - } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") { - $Wshort_desc = 1; - } elsif ($cmd eq "Wall") { - $Wreturn = 1; - $Wshort_desc = 1; - } elsif (($cmd eq "h") || ($cmd eq "help")) { - pod2usage(-exitval => 0, -verbose => 2); - } elsif ($cmd eq 'no-doc-sections') { - $no_doc_sections = 1; - } elsif ($cmd eq 'enable-lineno') { - $enable_lineno = 1; - } elsif ($cmd eq 'show-not-found') { - $show_not_found = 1; # A no-op but don't fail - } else { - # Unknown argument - pod2usage( - -message => "Argument unknown!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } - if ($#ARGV < 0){ - pod2usage( - -message => "FILE argument missing\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } -} - -# continue execution near EOF; - -sub findprog($) -{ - foreach(split(/:/, $ENV{PATH})) { - return "$_/$_[0]" if(-x "$_/$_[0]"); - } -} - -# get kernel version from env -sub get_kernel_version() { - my $version = 'unknown kernel version'; - - if (defined($ENV{'KERNELVERSION'})) { - $version = $ENV{'KERNELVERSION'}; - } - return $version; -} - -# -sub print_lineno { - my $lineno = shift; - if ($enable_lineno && defined($lineno)) { - print ".. LINENO " . $lineno . "\n"; - } -} - -sub emit_warning { - my $location = shift; - my $msg = shift; - print STDERR "$location: warning: $msg"; - ++$warnings; -} -## -# dumps section contents to arrays/hashes intended for that purpose. -# -sub dump_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($name =~ m/$type_param/) { - $name = $1; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } elsif ($name eq "@\.\.\.") { - $name = "..."; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } else { - if (defined($sections{$name}) && ($sections{$name} ne "")) { - # Only warn on user specified duplicate section names. - if ($name ne $section_default) { - emit_warning("${file}:$.", "duplicate section name '$name'\n"); - } - $sections{$name} .= $contents; - } else { - $sections{$name} = $contents; - push @sectionlist, $name; - $section_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } - } -} - -## -# dump DOC: section after checking that it should go out -# -sub dump_doc_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($no_doc_sections) { - return; - } - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE) && - defined($function_table{$name}))) - { - dump_section($file, $name, $contents); - output_blockhead({'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'module' => $modulename, - 'content-only' => ($output_selection != OUTPUT_ALL), }); - } -} - -## -# output function -# -# parameterdescs, a hash. -# function => "function name" -# parameterlist => @list of parameters -# parameterdescs => %parameter descriptions -# sectionlist => @list of sections -# sections => %section descriptions -# - -sub output_highlight { - my $contents = join "\n",@_; - my $line; - -# DEBUG -# if (!defined $contents) { -# use Carp; -# confess "output_highlight got called with no args?\n"; -# } - -# print STDERR "contents b4:$contents\n"; - eval $dohighlight; - die $@ if $@; -# print STDERR "contents af:$contents\n"; - - foreach $line (split "\n", $contents) { - if (! $output_preformatted) { - $line =~ s/^\s*//; - } - if ($line eq ""){ - if (! $output_preformatted) { - print $lineprefix, $blankline; - } - } else { - if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { - print "\\&$line"; - } else { - print $lineprefix, $line; - } - } - print "\n"; - } -} - -## -# output function in man -sub output_function_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - if ($args{'functiontype'} ne "") { - print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; - } else { - print ".B \"" . $args{'function'} . "\n"; - } - $count = 0; - my $parenth = "("; - my $post = ","; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count == $#{$args{'parameterlist'}}) { - $post = ");"; - } - $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/$function_pointer/) { - # pointer-to-function - print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; - } else { - $type =~ s/([^\*])$/$1 /; - print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n"; - } - $count++; - $parenth = ""; - } - - $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - if ($paramcount >= 0) { - print ".SH ARGUMENTS\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"", uc $section, "\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output enum in man -sub output_enum_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - print "enum " . $args{'enum'} . " {\n"; - $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - print ".br\n.BI \" $parameter\"\n"; - if ($count == $#{$args{'parameterlist'}}) { - print "\n};\n"; - last; - } else { - print ", \n.br\n"; - } - $count++; - } - - print ".SH Constants\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output struct in man -sub output_struct_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; - - my $declaration = $args{'definition'}; - $declaration =~ s/\t/ /g; - $declaration =~ s/\n/"\n.br\n.BI \"/g; - print ".SH SYNOPSIS\n"; - print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; - print ".BI \"$declaration\n};\n.br\n\n"; - - print ".SH Members\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output typedef in man -sub output_typedef_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -sub output_blockhead_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output in restructured text -# - -# -# This could use some work; it's used to output the DOC: sections, and -# starts by putting out the name of the doc section itself, but that tends -# to duplicate a header already in the template file. -# -sub output_blockhead_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - foreach $section (@{$args{'sectionlist'}}) { - next if (defined($nosymbol_table{$section})); - - if ($output_selection != OUTPUT_INCLUDE) { - print ".. _$section:\n\n"; - print "**$section**\n\n"; - } - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } -} - -# -# Apply the RST highlights to a sub-block of text. -# -sub highlight_block($) { - # The dohighlight kludge requires the text be called $contents - my $contents = shift; - eval $dohighlight; - die $@ if $@; - return $contents; -} - -# -# Regexes used only here. -# -my $sphinx_literal = '^[^.].*::$'; -my $sphinx_cblock = '^\.\.\ +code-block::'; - -sub output_highlight_rst { - my $input = join "\n",@_; - my $output = ""; - my $line; - my $in_literal = 0; - my $litprefix; - my $block = ""; - - foreach $line (split "\n",$input) { - # - # If we're in a literal block, see if we should drop out - # of it. Otherwise pass the line straight through unmunged. - # - if ($in_literal) { - if (! ($line =~ /^\s*$/)) { - # - # If this is the first non-blank line in a literal - # block we need to figure out what the proper indent is. - # - if ($litprefix eq "") { - $line =~ /^(\s*)/; - $litprefix = '^' . $1; - $output .= $line . "\n"; - } elsif (! ($line =~ /$litprefix/)) { - $in_literal = 0; - } else { - $output .= $line . "\n"; - } - } else { - $output .= $line . "\n"; - } - } - # - # Not in a literal block (or just dropped out) - # - if (! $in_literal) { - $block .= $line . "\n"; - if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { - $in_literal = 1; - $litprefix = ""; - $output .= highlight_block($block); - $block = "" - } - } - } - - if ($block) { - $output .= highlight_block($block); - } - - $output =~ s/^\n+//g; - $output =~ s/\n+$//g; - - foreach $line (split "\n", $output) { - print $lineprefix . $line . "\n"; - } -} - -sub output_function_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $oldprefix = $lineprefix; - - my $signature = ""; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - if ($func_macro) { - $signature = $args{'function'}; - } else { - if ($args{'functiontype'}) { - $signature = $args{'functiontype'} . " "; - } - $signature .= $args{'function'} . " ("; - } - - my $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count ne 0) { - $signature .= ", "; - } - $count++; - $type = $args{'parametertypes'}{$parameter}; - - if ($type =~ m/$function_pointer/) { - # pointer-to-function - $signature .= $1 . $parameter . ") (" . $2 . ")"; - } else { - $signature .= $type; - } - } - - if (!$func_macro) { - $signature .= ")"; - } - - if ($args{'typedef'} || $args{'functiontype'} eq "") { - print ".. c:macro:: ". $args{'function'} . "\n\n"; - - if ($args{'typedef'}) { - print_lineno($declaration_start_line); - print " **Typedef**: "; - $lineprefix = ""; - output_highlight_rst($args{'purpose'}); - print "\n\n**Syntax**\n\n"; - print " ``$signature``\n\n"; - } else { - print "``$signature``\n\n"; - } - } else { - print ".. c:function:: $signature\n\n"; - } - - if (!$args{'typedef'}) { - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - } - - # - # Put our descriptive text into a container (thus an HTML
) to help - # set the function prototypes apart. - # - $lineprefix = " "; - if ($paramcount >= 0) { - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Parameters**\n\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - $type = $args{'parametertypes'}{$parameter}; - - if ($type ne "") { - print $lineprefix . "``$type``\n"; - } else { - print $lineprefix . "``$parameter``\n"; - } - - print_lineno($parameterdesc_start_lines{$parameter_name}); - - $lineprefix = " "; - if (defined($args{'parameterdescs'}{$parameter_name}) && - $args{'parameterdescs'}{$parameter_name} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - } else { - print $lineprefix . "*undescribed*\n"; - } - $lineprefix = " "; - print "\n"; - } - - output_section_rst(@_); - $lineprefix = $oldprefix; -} - -sub output_section_rst(%) { - my %args = %{$_[0]}; - my $section; - my $oldprefix = $lineprefix; - - foreach $section (@{$args{'sectionlist'}}) { - print $lineprefix . "**$section**\n\n"; - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } - print "\n"; -} - -sub output_enum_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $count; - my $outer; - - my $name = $args{'enum'}; - print "\n\n.. c:enum:: " . $name . "\n\n"; - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - $outer = $lineprefix . " "; - $lineprefix = $outer . " "; - print $outer . "**Constants**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - print $outer . "``$parameter``\n"; - - if ($args{'parameterdescs'}{$parameter} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter}); - } else { - print $lineprefix . "*undescribed*\n"; - } - print "\n"; - } - print "\n"; - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_typedef_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $name; - - $name = $args{'typedef'}; - - print "\n\n.. c:type:: " . $name . "\n\n"; - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_struct_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - - my $name = $args{'struct'}; - if ($args{'type'} eq 'union') { - print "\n\n.. c:union:: " . $name . "\n\n"; - } else { - print "\n\n.. c:struct:: " . $name . "\n\n"; - } - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Definition**::\n\n"; - my $declaration = $args{'definition'}; - $lineprefix = $lineprefix . " "; - $declaration =~ s/\t/$lineprefix/g; - print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n"; - - $lineprefix = " "; - print $lineprefix . "**Members**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - $type = $args{'parametertypes'}{$parameter}; - print_lineno($parameterdesc_start_lines{$parameter_name}); - print $lineprefix . "``" . $parameter . "``\n"; - $lineprefix = " "; - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - $lineprefix = " "; - print "\n"; - } - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -## none mode output functions - -sub output_function_none(%) { -} - -sub output_enum_none(%) { -} - -sub output_typedef_none(%) { -} - -sub output_struct_none(%) { -} - -sub output_blockhead_none(%) { -} - -## -# generic output function for all types (function, struct/union, typedef, enum); -# calls the generated, variable output_ function name based on -# functype and output_mode -sub output_declaration { - no strict 'refs'; - my $name = shift; - my $functype = shift; - my $func = "output_${functype}_$output_mode"; - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE || - $output_selection == OUTPUT_EXPORTED) && - defined($function_table{$name})) || - ($output_selection == OUTPUT_INTERNAL && - !($functype eq "function" && defined($function_table{$name})))) - { - &$func(@_); - $section_counter++; - } -} - -## -# generic output function - calls the right one based on current output mode. -sub output_blockhead { - no strict 'refs'; - my $func = "output_blockhead_" . $output_mode; - &$func(@_); - $section_counter++; -} - -## -# takes a declaration (struct, union, enum, typedef) and -# invokes the right handler. NOT called for functions. -sub dump_declaration($$) { - no strict 'refs'; - my ($prototype, $file) = @_; - my $func = "dump_" . $decl_type; - &$func(@_); -} - -sub dump_union($$) { - dump_struct(@_); -} - -sub dump_struct($$) { - my $x = shift; - my $file = shift; - my $decl_type; - my $members; - my $type = qr{struct|union}; - # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; - my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; - my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; - - if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { - $decl_type = $1; - $declaration_name = $2; - $members = $3; - } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) { - $decl_type = $1; - $declaration_name = $3; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"); - return; - } - - # ignore members marked private: - $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $members =~ s/\/\*\s*private:.*//gosi; - # strip comments: - $members =~ s/\/\*.*?\*\///gos; - # strip attributes - $members =~ s/\s*$attribute/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; - $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; - # unwrap struct_group(): - # - first eat non-declaration parameters and rewrite for final match - # - then remove macro, outer parens, and trailing semicolon - $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos; - $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; - $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; - - my $args = qr{([^,)]+)}; - # replace DECLARE_BITMAP - $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos; - $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; - # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; - # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_FLEX_ARRAY - $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; - #replace DEFINE_DMA_UNMAP_ADDR - $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos; - #replace DEFINE_DMA_UNMAP_LEN - $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos; - my $declaration = $members; - - # Split nested struct/union elements as newer ones - while ($members =~ m/$struct_members/) { - my $newmember; - my $maintype = $1; - my $ids = $4; - my $content = $3; - foreach my $id(split /,/, $ids) { - $newmember .= "$maintype $id; "; - - $id =~ s/[:\[].*//; - $id =~ s/^\s*\**(\S+)\s*/$1/; - foreach my $arg (split /;/, $content) { - next if ($arg =~ m/^\s*$/); - if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { - # pointer-to-function - my $type = $1; - my $name = $2; - my $extra = $3; - next if (!$name); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type$name$extra; "; - } else { - $newmember .= "$type$id.$name$extra; "; - } - } else { - my $type; - my $names; - $arg =~ s/^\s+//; - $arg =~ s/\s+$//; - # Handle bitmaps - $arg =~ s/:\s*\d+\s*//g; - # Handle arrays - $arg =~ s/\[.*\]//g; - # The type may have multiple words, - # and multiple IDs can be defined, like: - # const struct foo, *bar, foobar - # So, we remove spaces when parsing the - # names, in order to match just names - # and commas for the names - $arg =~ s/\s*,\s*/,/g; - if ($arg =~ m/(.*)\s+([\S+,]+)/) { - $type = $1; - $names = $2; - } else { - $newmember .= "$arg; "; - next; - } - foreach my $name (split /,/, $names) { - $name =~ s/^\s*\**(\S+)\s*/$1/; - next if (($name =~ m/^\s*$/)); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type $name; "; - } else { - $newmember .= "$type $id.$name; "; - } - } - } - } - } - $members =~ s/$struct_members/$newmember/; - } - - # Ignore other nested elements, like enums - $members =~ s/(\{[^\{\}]*\})//g; - - create_parameterlist($members, ';', $file, $declaration_name); - check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); - - # Adjust declaration for better display - $declaration =~ s/([\{;])/$1\n/g; - $declaration =~ s/\}\s+;/};/g; - # Better handle inlined enums - do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); - - my @def_args = split /\n/, $declaration; - my $level = 1; - $declaration = ""; - foreach my $clause (@def_args) { - $clause =~ s/^\s+//; - $clause =~ s/\s+$//; - $clause =~ s/\s+/ /; - next if (!$clause); - $level-- if ($clause =~ m/(\})/ && $level > 1); - if (!($clause =~ m/^\s*#/)) { - $declaration .= "\t" x $level; - } - $declaration .= "\t" . $clause . "\n"; - $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); - } - output_declaration($declaration_name, - 'struct', - {'struct' => $declaration_name, - 'module' => $modulename, - 'definition' => $declaration, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'type' => $decl_type - }); - } else { - print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; - ++$errors; - } -} - - -sub show_warnings($$) { - my $functype = shift; - my $name = shift; - - return 0 if (defined($nosymbol_table{$name})); - - return 1 if ($output_selection == OUTPUT_ALL); - - if ($output_selection == OUTPUT_EXPORTED) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INTERNAL) { - if (!($functype eq "function" && defined($function_table{$name}))) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INCLUDE) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - die("Please add the new output type at show_warnings()"); -} - -sub dump_enum($$) { - my $x = shift; - my $file = shift; - my $members; - - # ignore members marked private: - $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $x =~ s/\/\*\s*private:.*}/}/gosi; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - # strip #define macros inside enums - $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos; - - if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) { - $declaration_name = $2; - $members = $1; - } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) { - $declaration_name = $1; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - if ($identifier eq "") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n"); - } else { - emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"); - } - return; - } - $declaration_name = "(anonymous)" if ($declaration_name eq ""); - - my %_members; - - $members =~ s/\s+$//; - $members =~ s/\([^;]*?[\)]//g; - - foreach my $arg (split ',', $members) { - $arg =~ s/^\s*(\w+).*/$1/; - push @parameterlist, $arg; - if (!$parameterdescs{$arg}) { - $parameterdescs{$arg} = $undescribed; - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n"); - } - } - $_members{$arg} = 1; - } - - while (my ($k, $v) = each %parameterdescs) { - if (!exists($_members{$k})) { - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n"); - } - } - } - - output_declaration($declaration_name, - 'enum', - {'enum' => $declaration_name, - 'module' => $modulename, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse enum!\n"; - ++$errors; - } -} - -my $typedef_type = qr { ((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s* }x; -my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; -my $typedef_args = qr { \s*\((.*)\); }x; - -my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; -my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; - -sub dump_typedef($$) { - my $x = shift; - my $file = shift; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - - # Parse function typedef prototypes - if ($x =~ $typedef1 || $x =~ $typedef2) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - $return_type =~ s/^\s+//; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - create_parameterlist($args, ',', $file, $declaration_name); - - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - return; - } - - while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { - $x =~ s/\(*.\)\s*;$/;/; - $x =~ s/\[*.\]\s*;$/;/; - } - - if ($x =~ /typedef.*\s+(\w+)\s*;/) { - $declaration_name = $1; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - output_declaration($declaration_name, - 'typedef', - {'typedef' => $declaration_name, - 'module' => $modulename, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse typedef!\n"; - ++$errors; - } -} - -sub save_struct_actual($) { - my $actual = shift; - - # strip all spaces from the actual param so that it looks like one string item - $actual =~ s/\s*//g; - $struct_actual = $struct_actual . $actual . " "; -} - -sub create_parameterlist($$$$) { - my $args = shift; - my $splitter = shift; - my $file = shift; - my $declaration_name = shift; - my $type; - my $param; - - # temporarily replace commas inside function pointer definition - my $arg_expr = qr{\([^\),]+}; - while ($args =~ /$arg_expr,/) { - $args =~ s/($arg_expr),/$1#/g; - } - - foreach my $arg (split($splitter, $args)) { - # strip comments - $arg =~ s/\/\*.*\*\///; - # ignore argument attributes - $arg =~ s/\sPOS0?\s/ /; - # strip leading/trailing spaces - $arg =~ s/^\s*//; - $arg =~ s/\s*$//; - $arg =~ s/\s+/ /; - - if ($arg =~ /^#/) { - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - push_parameter($arg, "", "", $file); - } elsif ($arg =~ m/\(.+\)\s*\(/) { - # pointer-to-function - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg =~ m/\(.+\)\s*\[/) { - # array-of-pointers - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg) { - $arg =~ s/\s*:\s*/:/g; - $arg =~ s/\s*\[/\[/g; - - my @args = split('\s*,\s*', $arg); - if ($args[0] =~ m/\*/) { - $args[0] =~ s/(\*+)\s*/ $1/; - } - - my @first_arg; - if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { - shift @args; - push(@first_arg, split('\s+', $1)); - push(@first_arg, $2); - } else { - @first_arg = split('\s+', shift @args); - } - - unshift(@args, pop @first_arg); - $type = join " ", @first_arg; - - foreach $param (@args) { - if ($param =~ m/^(\*+)\s*(.*)/) { - save_struct_actual($2); - - push_parameter($2, "$type $1", $arg, $file, $declaration_name); - } elsif ($param =~ m/(.*?):(\w+)/) { - if ($type ne "") { # skip unnamed bit-fields - save_struct_actual($1); - push_parameter($1, "$type:$2", $arg, $file, $declaration_name) - } - } else { - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } - } - } - } -} - -sub push_parameter($$$$$) { - my $param = shift; - my $type = shift; - my $org_arg = shift; - my $file = shift; - my $declaration_name = shift; - - if (($anon_struct_union == 1) && ($type eq "") && - ($param eq "}")) { - return; # ignore the ending }; from anon. struct/union - } - - $anon_struct_union = 0; - $param =~ s/[\[\)].*//; - - if ($type eq "" && $param =~ /\.\.\.$/) - { - if (!$param =~ /\w\.\.\.$/) { - # handles unnamed variable parameters - $param = "..."; - } elsif ($param =~ /\w\.\.\.$/) { - # for named variable parameters of the form `x...`, remove the dots - $param =~ s/\.\.\.$//; - } - if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { - $parameterdescs{$param} = "variable arguments"; - } - } - elsif ($type eq "" && ($param eq "" or $param eq "void")) - { - $param="void"; - $parameterdescs{void} = "no arguments"; - } - elsif ($type eq "" && ($param eq "struct" or $param eq "union")) - # handle unnamed (anonymous) union or struct: - { - $type = $param; - $param = "{unnamed_" . $param . "}"; - $parameterdescs{$param} = "anonymous\n"; - $anon_struct_union = 1; - } - elsif ($param =~ "__cacheline_group" ) - # handle cache group enforcing variables: they do not need be described in header files - { - return; # ignore __cacheline_group_begin and __cacheline_group_end - } - - # warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements); - # Note: It will also ignore void params and unnamed structs/unions - if (!defined $parameterdescs{$param} && $param !~ /^#/) { - $parameterdescs{$param} = $undescribed; - - if (show_warnings($type, $declaration_name) && $param !~ /\./) { - emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n"); - } - } - - # strip spaces from $param so that it is one continuous string - # on @parameterlist; - # this fixes a problem where check_sections() cannot find - # a parameter like "addr[6 + 2]" because it actually appears - # as "addr[6", "+", "2]" on the parameter list; - # but it's better to maintain the param string unchanged for output, - # so just weaken the string compare in check_sections() to ignore - # "[blah" in a parameter string; - ###$param =~ s/\s*//g; - push @parameterlist, $param; - $org_arg =~ s/\s\s+/ /g; - $parametertypes{$param} = $org_arg; -} - -sub check_sections($$$$$) { - my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; - my @sects = split ' ', $sectcheck; - my @prms = split ' ', $prmscheck; - my $err; - my ($px, $sx); - my $prm_clean; # strip trailing "[array size]" and/or beginning "*" - - foreach $sx (0 .. $#sects) { - $err = 1; - foreach $px (0 .. $#prms) { - $prm_clean = $prms[$px]; - $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/$attribute//i; - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - $prm_clean =~ s/\[.*//; - - ##$prm_clean =~ s/^\**//; - if ($prm_clean eq $sects[$sx]) { - $err = 0; - last; - } - } - if ($err) { - if ($decl_type eq "function") { - emit_warning("${file}:$.", - "Excess function parameter " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } elsif (($decl_type eq "struct") or - ($decl_type eq "union")) { - emit_warning("${file}:$.", - "Excess $decl_type member " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } - } - } -} - -## -# Checks the section describing the return value of a function. -sub check_return_section { - my $file = shift; - my $declaration_name = shift; - my $return_type = shift; - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type. (But don't ignore "void *") - if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { - return; - } - - if (!defined($sections{$section_return}) || - $sections{$section_return} eq "") - { - emit_warning("${file}:$.", - "No description found for return value of " . - "'$declaration_name'\n"); - } -} - -## -# takes a function prototype and the name of the current file being -# processed and spits out all the details stored in the global -# arrays/hashes. -sub dump_function($$) { - my $prototype = shift; - my $file = shift; - my $func_macro = 0; - - print_lineno($new_start_line); - - $prototype =~ s/^static +//; - $prototype =~ s/^extern +//; - $prototype =~ s/^asmlinkage +//; - $prototype =~ s/^inline +//; - $prototype =~ s/^__inline__ +//; - $prototype =~ s/^__inline +//; - $prototype =~ s/^__always_inline +//; - $prototype =~ s/^noinline +//; - $prototype =~ s/^__FORTIFY_INLINE +//; - $prototype =~ s/__init +//; - $prototype =~ s/__init_or_module +//; - $prototype =~ s/__deprecated +//; - $prototype =~ s/__flatten +//; - $prototype =~ s/__meminit +//; - $prototype =~ s/__must_check +//; - $prototype =~ s/__weak +//; - $prototype =~ s/__sched +//; - $prototype =~ s/_noprof//; - $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; - $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; - $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; - $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; - my $define = $prototype =~ s/^#\s*define\s+//; #ak added - $prototype =~ s/__attribute_const__ +//; - $prototype =~ s/__attribute__\s*\(\( - (?: - [\w\s]++ # attribute name - (?:\([^)]*+\))? # attribute arguments - \s*+,? # optional comma at the end - )+ - \)\)\s+//x; - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - my $name = qr{[a-zA-Z0-9_~:]+}; - my $prototype_end1 = qr{[^\(]*}; - my $prototype_end2 = qr{[^\{]*}; - my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; - my $type1 = qr{[\w\s]+}; - my $type2 = qr{$type1\*+}; - - if ($define && $prototype =~ m/^()($name)\s+/) { - # This is an object-like macro, it has no return type and no parameter - # list. - # Function-like macros are not allowed to have spaces between - # declaration_name and opening parenthesis (notice the \s+). - $return_type = $1; - $declaration_name = $2; - $func_macro = 1; - } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || - $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || - $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - - create_parameterlist($args, ',', $file, $declaration_name); - } else { - emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n"); - return; - } - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"); - return; - } - - my $prms = join " ", @parameterlist; - check_sections($file, $declaration_name, "function", $sectcheck, $prms); - - # This check emits a lot of warnings at the moment, because many - # functions don't have a 'Return' doc section. So until the number - # of warnings goes sufficiently down, the check is only performed in - # -Wreturn mode. - # TODO: always perform the check. - if ($Wreturn && !$func_macro) { - check_return_section($file, $declaration_name, $return_type); - } - - # The function parser can be called with a typedef parameter. - # Handle it. - if ($return_type =~ /typedef/) { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } else { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } -} - -sub reset_state { - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $sectcheck = ""; - $struct_actual = ""; - $prototype = ""; - - $state = STATE_NORMAL; - $inline_doc_state = STATE_INLINE_NA; -} - -sub tracepoint_munge($) { - my $file = shift; - my $tracepointname = 0; - my $tracepointargs = 0; - - if ($prototype =~ m/TRACE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { - $tracepointname = $2; - } - $tracepointname =~ s/^\s+//; #strip leading whitespace - if ($prototype =~ m/TP_PROTO\((.*?)\)/) { - $tracepointargs = $1; - } - if (($tracepointname eq 0) || ($tracepointargs eq 0)) { - emit_warning("${file}:$.", "Unrecognized tracepoint format: \n". - "$prototype\n"); - } else { - $prototype = "static inline void trace_$tracepointname($tracepointargs)"; - $identifier = "trace_$identifier"; - } -} - -sub syscall_munge() { - my $void = 0; - - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's -## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { - if ($prototype =~ m/SYSCALL_DEFINE0/) { - $void = 1; -## $prototype = "long sys_$1(void)"; - } - - $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name - if ($prototype =~ m/long (sys_.*?),/) { - $prototype =~ s/,/\(/; - } elsif ($void) { - $prototype =~ s/\)/\(void\)/; - } - - # now delete all of the odd-number commas in $prototype - # so that arg types & arg names don't have a comma between them - my $count = 0; - my $len = length($prototype); - if ($void) { - $len = 0; # skip the for-loop - } - for (my $ix = 0; $ix < $len; $ix++) { - if (substr($prototype, $ix, 1) eq ',') { - $count++; - if ($count % 2 == 1) { - substr($prototype, $ix, 1) = ' '; - } - } - } -} - -sub process_proto_function($$) { - my $x = shift; - my $file = shift; - - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/ && $x !~ /^#\s*define/) { - # do nothing - } elsif ($x =~ /([^\{]*)/) { - $prototype .= $1; - } - - if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { - $prototype =~ s@/\*.*?\*/@@gos; # strip comments. - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $prototype =~ s@^\s+@@gos; # strip leading spaces - - # Handle prototypes for function pointers like: - # int (*pcs_config)(struct foo) - $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos; - - if ($prototype =~ /SYSCALL_DEFINE/) { - syscall_munge(); - } - if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || - $prototype =~ /DEFINE_SINGLE_EVENT/) - { - tracepoint_munge($file); - } - dump_function($prototype, $file); - reset_state(); - } -} - -sub process_proto_type($$) { - my $x = shift; - my $file = shift; - - $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $x =~ s@^\s+@@gos; # strip leading spaces - $x =~ s@\s+$@@gos; # strip trailing spaces - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/) { - # To distinguish preprocessor directive from regular declaration later. - $x .= ";"; - } - - while (1) { - if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { - if( length $prototype ) { - $prototype .= " " - } - $prototype .= $1 . $2; - ($2 eq '{') && $brcount++; - ($2 eq '}') && $brcount--; - if (($2 eq ';') && ($brcount == 0)) { - dump_declaration($prototype, $file); - reset_state(); - last; - } - $x = $3; - } else { - $prototype .= $x; - last; - } - } -} - - -sub map_filename($) { - my $file; - my ($orig_file) = @_; - - if (defined($ENV{'SRCTREE'})) { - $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; - } else { - $file = $orig_file; - } - - return $file; -} - -sub process_export_file($) { - my ($orig_file) = @_; - my $file = map_filename($orig_file); - - if (!open(IN,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - while () { - if (/$export_symbol/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - if (/$export_symbol_ns/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - } - - close(IN); -} - -# -# Parsers for the various processing states. -# -# STATE_NORMAL: looking for the /** to begin everything. -# -sub process_normal() { - if (/$doc_start/o) { - $state = STATE_NAME; # next line is always the function name - $declaration_start_line = $. + 1; - } -} - -# -# STATE_NAME: Looking for the "name - description" line -# -sub process_name($$) { - my $file = shift; - my $descr; - - if (/$doc_block/o) { - $state = STATE_DOCBLOCK; - $contents = ""; - $new_start_line = $.; - - if ( $1 eq "" ) { - $section = $section_intro; - } else { - $section = $1; - } - } elsif (/$doc_decl/o) { - $identifier = $1; - my $is_kernel_comment = 0; - my $decl_start = qr{$doc_com}; - # test for pointer declaration type, foo * bar() - desc - my $fn_type = qr{\w+\s*\*\s*}; - my $parenthesis = qr{\(\w*\)}; - my $decl_end = qr{[-:].*}; - if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { - $identifier = $1; - } - if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { - $decl_type = $1; - $identifier = $2; - $is_kernel_comment = 1; - } - # Look for foo() or static void foo() - description; or misspelt - # identifier - elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) { - $identifier = $1; - $decl_type = 'function'; - $identifier =~ s/^define\s+//; - $is_kernel_comment = 1; - } - $identifier =~ s/\s+$//; - - $state = STATE_BODY; - # if there's no @param blocks need to set up default section - # here - $contents = ""; - $section = $section_default; - $new_start_line = $. + 1; - if (/[-:](.*)/) { - # strip leading/trailing/multiple spaces - $descr= $1; - $descr =~ s/^\s*//; - $descr =~ s/\s*$//; - $descr =~ s/\s+/ /g; - $declaration_purpose = $descr; - $state = STATE_BODY_MAYBE; - } else { - $declaration_purpose = ""; - } - - if (!$is_kernel_comment) { - emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_"); - $state = STATE_NORMAL; - } - - if (($declaration_purpose eq "") && $Wshort_desc) { - emit_warning("${file}:$.", "missing initial short description on line:\n$_"); - } - - if ($identifier eq "" && $decl_type ne "enum") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_"); - $state = STATE_NORMAL; - } - - if ($verbose) { - print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; - } - } else { - emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n"); - $state = STATE_NORMAL; - } -} - - -# -# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. -# -sub process_body($$) { - my $file = shift; - - if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) { - dump_section($file, $section, $contents); - $section = $section_default; - $new_start_line = $.; - $contents = ""; - } - - if (/$doc_sect/i) { # case insensitive for supported section names - $newsection = $1; - $newcontents = $2; - - # map the supported section names to the canonical names - if ($newsection =~ m/^description$/i) { - $newsection = $section_default; - } elsif ($newsection =~ m/^context$/i) { - $newsection = $section_context; - } elsif ($newsection =~ m/^returns?$/i) { - $newsection = $section_return; - } elsif ($newsection =~ m/^\@return$/) { - # special: @return is a section, not a param description - $newsection = $section_return; - } - - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - } - - $state = STATE_BODY; - $contents = $newcontents; - $new_start_line = $.; - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - if ($contents ne "") { - $contents .= "\n"; - } - $section = $newsection; - $leading_space = undef; - } elsif (/$doc_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - # look for doc_com + + doc_end: - if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { - emit_warning("${file}:$.", "suspicious ending line: $_"); - } - - $prototype = ""; - $state = STATE_PROTO; - $brcount = 0; - $new_start_line = $. + 1; - } elsif (/$doc_content/) { - if ($1 eq "") { - if ($section eq $section_context) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $new_start_line = $.; - $state = STATE_BODY; - } else { - if ($section ne $section_default) { - $state = STATE_BODY_WITH_BLANK_LINE; - } else { - $state = STATE_BODY; - } - $contents .= "\n"; - } - } elsif ($state == STATE_BODY_MAYBE) { - # Continued declaration purpose - chomp($declaration_purpose); - $declaration_purpose .= " " . $1; - $declaration_purpose =~ s/\s+/ /g; - } else { - my $cont = $1; - if ($section =~ m/^@/ || $section eq $section_context) { - if (!defined $leading_space) { - if ($cont =~ m/^(\s+)/) { - $leading_space = $1; - } else { - $leading_space = ""; - } - } - $cont =~ s/^$leading_space//; - } - $contents .= $cont . "\n"; - } - } else { - # i dont know - bad line? ignore. - emit_warning("${file}:$.", "bad line: $_"); - } -} - - -# -# STATE_PROTO: reading a function/whatever prototype. -# -sub process_proto($$) { - my $file = shift; - - if (/$doc_inline_oneline/) { - $section = $1; - $contents = $2; - if ($contents ne "") { - $contents .= "\n"; - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - } elsif (/$doc_inline_start/) { - $state = STATE_INLINE; - $inline_doc_state = STATE_INLINE_NAME; - } elsif ($decl_type eq 'function') { - process_proto_function($_, $file); - } else { - process_proto_type($_, $file); - } -} - -# -# STATE_DOCBLOCK: within a DOC: block. -# -sub process_docblock($$) { - my $file = shift; - - if (/$doc_end/) { - dump_doc_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $prototype = ""; - $state = STATE_NORMAL; - } elsif (/$doc_content/) { - if ( $1 eq "" ) { - $contents .= $blankline; - } else { - $contents .= $1 . "\n"; - } - } -} - -# -# STATE_INLINE: docbook comments within a prototype. -# -sub process_inline($$) { - my $file = shift; - - # First line (state 1) needs to be a @parameter - if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { - $section = $1; - $contents = $2; - $new_start_line = $.; - if ($contents ne "") { - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - $contents .= "\n"; - } - $inline_doc_state = STATE_INLINE_TEXT; - # Documentation block end */ - } elsif (/$doc_inline_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - $state = STATE_PROTO; - $inline_doc_state = STATE_INLINE_NA; - # Regular text - } elsif (/$doc_content/) { - if ($inline_doc_state == STATE_INLINE_TEXT) { - $contents .= $1 . "\n"; - # nuke leading blank lines - if ($contents =~ /^\s*$/) { - $contents = ""; - } - } elsif ($inline_doc_state == STATE_INLINE_NAME) { - $inline_doc_state = STATE_INLINE_ERROR; - emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_"); - } - } -} - - -sub process_file($) { - my $file; - my ($orig_file) = @_; - - $file = map_filename($orig_file); - - if (!open(IN_FILE,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - $. = 1; - - $section_counter = 0; - while () { - while (!/^ \*/ && s/\\\s*$//) { - $_ .= ; - } - # Replace tabs by spaces - while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; - # Hand this line to the appropriate state handler - if ($state == STATE_NORMAL) { - process_normal(); - } elsif ($state == STATE_NAME) { - process_name($file, $_); - } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE || - $state == STATE_BODY_WITH_BLANK_LINE) { - process_body($file, $_); - } elsif ($state == STATE_INLINE) { # scanning for inline parameters - process_inline($file, $_); - } elsif ($state == STATE_PROTO) { - process_proto($file, $_); - } elsif ($state == STATE_DOCBLOCK) { - process_docblock($file, $_); - } - } - - # Make sure we got something interesting. - if (!$section_counter && $output_mode ne "none") { - if ($output_selection == OUTPUT_INCLUDE) { - emit_warning("${file}:1", "'$_' not found\n") - for keys %function_table; - } else { - emit_warning("${file}:1", "no structured comments found\n"); - } - } - close IN_FILE; -} - -$kernelversion = get_kernel_version(); - -# generate a sequence of code that will splice in highlighting information -# using the s// operator. -for (my $k = 0; $k < @highlights; $k++) { - my $pattern = $highlights[$k][0]; - my $result = $highlights[$k][1]; -# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; - $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; -} - -if ($output_selection == OUTPUT_EXPORTED || - $output_selection == OUTPUT_INTERNAL) { - - push(@export_file_list, @ARGV); - - foreach (@export_file_list) { - chomp; - process_export_file($_); - } -} - -foreach (@ARGV) { - chomp; - process_file($_); -} -if ($verbose && $errors) { - print STDERR "$errors errors\n"; -} -if ($verbose && $warnings) { - print STDERR "$warnings warnings\n"; -} - -if ($Werror && $warnings) { - print STDERR "$warnings warnings as Errors\n"; - exit($warnings); -} else { - exit($output_mode eq "none" ? 0 : $errors) -} - -__END__ - -=head1 OPTIONS - -=head2 Output format selection (mutually exclusive): - -=over 8 - -=item -man - -Output troff manual page format. - -=item -rst - -Output reStructuredText format. This is the default. - -=item -none - -Do not output documentation, only warnings. - -=back - -=head2 Output format modifiers - -=head3 reStructuredText only - -=head2 Output selection (mutually exclusive): - -=over 8 - -=item -export - -Only output documentation for the symbols that have been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -internal - -Only output documentation for the symbols that have NOT been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -function NAME - -Only output documentation for the given function or DOC: section title. -All other functions and DOC: sections are ignored. - -May be specified multiple times. - -=item -nosymbol NAME - -Exclude the specified symbol from the output documentation. - -May be specified multiple times. - -=back - -=head2 Output selection modifiers: - -=over 8 - -=item -no-doc-sections - -Do not output DOC: sections. - -=item -export-file FILE - -Specify an additional FILE in which to look for EXPORT_SYMBOL information. - -To be used with -export or -internal. - -May be specified multiple times. - -=back - -=head3 reStructuredText only - -=over 8 - -=item -enable-lineno - -Enable output of .. LINENO lines. - -=back - -=head2 Other parameters: - -=over 8 - -=item -h, -help - -Print this help. - -=item -v - -Verbose output, more warnings and other information. - -=item -Werror - -Treat warnings as errors. - -=back - -=cut -- cgit v1.2.3 From b21f90e2e4503847ffeb00a9ef4d6d390291f902 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 29 Oct 2025 13:07:09 +0100 Subject: scripts: add tracepoint-update to the list of ignores files The new program for removing unused tracepoints is not ignored as it should. Add it to the local .gitignore. Cc: Vladimir Oltean Cc: Jacob Keller Cc: Jakub Kicinski Link: https://lore.kernel.org/20251029120709.24669-1-brgl@bgdev.pl Fixes: e30f8e61e251 ("tracing: Add a tracepoint verification check at build time") Signed-off-by: Bartosz Golaszewski Signed-off-by: Steven Rostedt (Google) --- scripts/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/.gitignore b/scripts/.gitignore index c2ef68848da5..4215c2208f7e 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -11,4 +11,5 @@ /sign-file /sorttable /target.json +/tracepoint-update /unifdef -- cgit v1.2.3 From c4781dc3d1cf0e017e1f290607ddc56cfe187afc Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 20 Oct 2025 16:22:27 +0200 Subject: Kbuild: enable -fms-extensions Once in a while, it turns out that enabling -fms-extensions could allow some slightly prettier code. But every time it has come up, the code that had to be used instead has been deemed "not too awful" and not worth introducing another compiler flag for. That's probably true for each individual case, but then it's somewhat of a chicken/egg situation. If we just "bite the bullet" as Linus says and enable it once and for all, it is available whenever a use case turns up, and no individual case has to justify it. A lore.kernel.org search provides these examples: - https://lore.kernel.org/lkml/200706301813.58435.agruen@suse.de/ - https://lore.kernel.org/lkml/20180419152817.GD25406@bombadil.infradead.org/ - https://lore.kernel.org/lkml/170622208395.21664.2510213291504081000@noble.neil.brown.name/ - https://lore.kernel.org/lkml/87h6475w9q.fsf@prevas.dk/ - https://lore.kernel.org/lkml/CAHk-=wjeZwww6Zswn6F_iZTpUihTSNKYppLqj36iQDDhfntuEw@mail.gmail.com/ Undoubtedly, there are more places in the code where this could also be used but where -fms-extensions just didn't come up in any discussion. Signed-off-by: Rasmus Villemoes Acked-by: David Sterba Link: https://patch.msgid.link/20251020142228.1819871-2-linux@rasmusvillemoes.dk [nathan: Move disabled clang warning to scripts/Makefile.extrawarn and adjust comment] Signed-off-by: Nathan Chancellor --- Makefile | 3 +++ scripts/Makefile.extrawarn | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/Makefile b/Makefile index 17cfa11ca716..1dc8873d4968 100644 --- a/Makefile +++ b/Makefile @@ -1061,6 +1061,9 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) +# Allow including a tagged struct or union anonymously in another struct/union. +KBUILD_CFLAGS += -fms-extensions + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6af392f9cd02..68e6fafcb80c 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -28,8 +28,10 @@ endif KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds ifdef CONFIG_CC_IS_CLANG -# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. +# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and +# Microsoft extensions is acceptable. KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag # Clang checks for overflow/truncation with '%p', while GCC does not: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -- cgit v1.2.3 From 04cadb4fe0341304741ef60a297366b553f0ce36 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Oct 2025 17:10:47 -0700 Subject: lib/crypto: Add FIPS self-tests for SHA-1 and SHA-2 Add FIPS cryptographic algorithm self-tests for all SHA-1 and SHA-2 algorithms. Following the "Implementation Guidance for FIPS 140-3" document, to achieve this it's sufficient to just test a single test vector for each of HMAC-SHA1, HMAC-SHA256, and HMAC-SHA512. Just run these tests in the initcalls, following the example of e.g. crypto/kdf_sp800108.c. Note that this should meet the FIPS self-test requirement even in the built-in case, given that the initcalls run before userspace, storage, network, etc. are accessible. This does not fix a regression, seeing as lib/ has had SHA-1 support since 2005 and SHA-256 support since 2018. Neither ever had FIPS self-tests. Moreover, fips=1 support has always been an unfinished feature upstream. However, with lib/ now being used more widely, it's now seeing more scrutiny and people seem to want these now [1][2]. [1] https://lore.kernel.org/r/3226361.1758126043@warthog.procyon.org.uk/ [2] https://lore.kernel.org/r/f31dbb22-0add-481c-aee0-e337a7731f8e@oracle.com/ Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251011001047.51886-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/fips.h | 38 +++++++++++++++++++++++++++++++++++++ lib/crypto/sha1.c | 19 ++++++++++++++++++- lib/crypto/sha256.c | 26 +++++++++++++++++++++---- lib/crypto/sha512.c | 19 ++++++++++++++++++- scripts/crypto/gen-fips-testvecs.py | 32 +++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 lib/crypto/fips.h create mode 100755 scripts/crypto/gen-fips-testvecs.py (limited to 'scripts') diff --git a/lib/crypto/fips.h b/lib/crypto/fips.h new file mode 100644 index 000000000000..78a1bdd33a15 --- /dev/null +++ b/lib/crypto/fips.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: gen-fips-testvecs.py */ + +#include + +static const u8 fips_test_data[] __initconst __maybe_unused = { + 0x66, 0x69, 0x70, 0x73, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x20, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, +}; + +static const u8 fips_test_key[] __initconst __maybe_unused = { + 0x66, 0x69, 0x70, 0x73, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x20, 0x6b, 0x65, 0x79, 0x00, 0x00, 0x00, +}; + +static const u8 fips_test_hmac_sha1_value[] __initconst __maybe_unused = { + 0x29, 0xa9, 0x88, 0xb8, 0x5c, 0xb4, 0xaf, 0x4b, + 0x97, 0x2a, 0xee, 0x87, 0x5b, 0x0a, 0x02, 0x55, + 0x99, 0xbf, 0x86, 0x78, +}; + +static const u8 fips_test_hmac_sha256_value[] __initconst __maybe_unused = { + 0x59, 0x25, 0x85, 0xcc, 0x40, 0xe9, 0x64, 0x2f, + 0xe9, 0xbf, 0x82, 0xb7, 0xd3, 0x15, 0x3d, 0x43, + 0x22, 0x0b, 0x4c, 0x00, 0x90, 0x14, 0x25, 0xcf, + 0x9e, 0x13, 0x2b, 0xc2, 0x30, 0xe6, 0xe8, 0x93, +}; + +static const u8 fips_test_hmac_sha512_value[] __initconst __maybe_unused = { + 0x6b, 0xea, 0x5d, 0x27, 0x49, 0x5b, 0x3f, 0xea, + 0xde, 0x2d, 0xfa, 0x32, 0x75, 0xdb, 0x77, 0xc8, + 0x26, 0xe9, 0x4e, 0x95, 0x4d, 0xad, 0x88, 0x02, + 0x87, 0xf9, 0x52, 0x0a, 0xd1, 0x92, 0x80, 0x1d, + 0x92, 0x7e, 0x3c, 0xbd, 0xb1, 0x3c, 0x49, 0x98, + 0x44, 0x9c, 0x8f, 0xee, 0x3f, 0x02, 0x71, 0x51, + 0x57, 0x0b, 0x15, 0x38, 0x95, 0xd8, 0xa3, 0x81, + 0xba, 0xb3, 0x15, 0x37, 0x5c, 0x6d, 0x57, 0x2b, +}; diff --git a/lib/crypto/sha1.c b/lib/crypto/sha1.c index 5904e4ae85d2..52788278cd17 100644 --- a/lib/crypto/sha1.c +++ b/lib/crypto/sha1.c @@ -12,6 +12,7 @@ #include #include #include +#include "fips.h" static const struct sha1_block_state sha1_iv = { .h = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, @@ -330,10 +331,26 @@ void hmac_sha1_usingrawkey(const u8 *raw_key, size_t raw_key_len, } EXPORT_SYMBOL_GPL(hmac_sha1_usingrawkey); -#ifdef sha1_mod_init_arch +#if defined(sha1_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha1_mod_init(void) { +#ifdef sha1_mod_init_arch sha1_mod_init_arch(); +#endif + if (fips_enabled) { + /* + * FIPS cryptographic algorithm self-test. As per the FIPS + * Implementation Guidance, testing HMAC-SHA1 satisfies the test + * requirement for SHA-1 too. + */ + u8 mac[SHA1_DIGEST_SIZE]; + + hmac_sha1_usingrawkey(fips_test_key, sizeof(fips_test_key), + fips_test_data, sizeof(fips_test_data), + mac); + if (memcmp(fips_test_hmac_sha1_value, mac, sizeof(mac)) != 0) + panic("sha1: FIPS self-test failed\n"); + } return 0; } subsys_initcall(sha1_mod_init); diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c index 881b935418ce..5d6b77e7e141 100644 --- a/lib/crypto/sha256.c +++ b/lib/crypto/sha256.c @@ -17,6 +17,7 @@ #include #include #include +#include "fips.h" static const struct sha256_block_state sha224_iv = { .h = { @@ -269,8 +270,8 @@ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]) EXPORT_SYMBOL(sha256); /* - * Pre-boot environment (as indicated by __DISABLE_EXPORTS being defined) - * doesn't need either HMAC support or interleaved hashing support + * Pre-boot environments (as indicated by __DISABLE_EXPORTS being defined) just + * need the generic SHA-256 code. Omit all other features from them. */ #ifndef __DISABLE_EXPORTS @@ -477,12 +478,27 @@ void hmac_sha256_usingrawkey(const u8 *raw_key, size_t raw_key_len, hmac_sha256_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha256_usingrawkey); -#endif /* !__DISABLE_EXPORTS */ -#ifdef sha256_mod_init_arch +#if defined(sha256_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha256_mod_init(void) { +#ifdef sha256_mod_init_arch sha256_mod_init_arch(); +#endif + if (fips_enabled) { + /* + * FIPS cryptographic algorithm self-test. As per the FIPS + * Implementation Guidance, testing HMAC-SHA256 satisfies the + * test requirement for SHA-224, SHA-256, and HMAC-SHA224 too. + */ + u8 mac[SHA256_DIGEST_SIZE]; + + hmac_sha256_usingrawkey(fips_test_key, sizeof(fips_test_key), + fips_test_data, sizeof(fips_test_data), + mac); + if (memcmp(fips_test_hmac_sha256_value, mac, sizeof(mac)) != 0) + panic("sha256: FIPS self-test failed\n"); + } return 0; } subsys_initcall(sha256_mod_init); @@ -493,5 +509,7 @@ static void __exit sha256_mod_exit(void) module_exit(sha256_mod_exit); #endif +#endif /* !__DISABLE_EXPORTS */ + MODULE_DESCRIPTION("SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions"); MODULE_LICENSE("GPL"); diff --git a/lib/crypto/sha512.c b/lib/crypto/sha512.c index d8062188be98..605eab51aabd 100644 --- a/lib/crypto/sha512.c +++ b/lib/crypto/sha512.c @@ -17,6 +17,7 @@ #include #include #include +#include "fips.h" static const struct sha512_block_state sha384_iv = { .h = { @@ -405,10 +406,26 @@ void hmac_sha512_usingrawkey(const u8 *raw_key, size_t raw_key_len, } EXPORT_SYMBOL_GPL(hmac_sha512_usingrawkey); -#ifdef sha512_mod_init_arch +#if defined(sha512_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha512_mod_init(void) { +#ifdef sha512_mod_init_arch sha512_mod_init_arch(); +#endif + if (fips_enabled) { + /* + * FIPS cryptographic algorithm self-test. As per the FIPS + * Implementation Guidance, testing HMAC-SHA512 satisfies the + * test requirement for SHA-384, SHA-512, and HMAC-SHA384 too. + */ + u8 mac[SHA512_DIGEST_SIZE]; + + hmac_sha512_usingrawkey(fips_test_key, sizeof(fips_test_key), + fips_test_data, sizeof(fips_test_data), + mac); + if (memcmp(fips_test_hmac_sha512_value, mac, sizeof(mac)) != 0) + panic("sha512: FIPS self-test failed\n"); + } return 0; } subsys_initcall(sha512_mod_init); diff --git a/scripts/crypto/gen-fips-testvecs.py b/scripts/crypto/gen-fips-testvecs.py new file mode 100755 index 000000000000..2956f88b764a --- /dev/null +++ b/scripts/crypto/gen-fips-testvecs.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script that generates lib/crypto/fips.h +# +# Copyright 2025 Google LLC + +import hmac + +fips_test_data = b"fips test data\0\0" +fips_test_key = b"fips test key\0\0\0" + +def print_static_u8_array_definition(name, value): + print('') + print(f'static const u8 {name}[] __initconst __maybe_unused = {{') + for i in range(0, len(value), 8): + line = '\t' + ''.join(f'0x{b:02x}, ' for b in value[i:i+8]) + print(f'{line.rstrip()}') + print('};') + +print('/* SPDX-License-Identifier: GPL-2.0-or-later */') +print(f'/* This file was generated by: gen-fips-testvecs.py */') +print() +print('#include ') + +print_static_u8_array_definition("fips_test_data", fips_test_data) +print_static_u8_array_definition("fips_test_key", fips_test_key) + +for alg in 'sha1', 'sha256', 'sha512': + ctx = hmac.new(fips_test_key, digestmod=alg) + ctx.update(fips_test_data) + print_static_u8_array_definition(f'fips_test_hmac_{alg}_value', ctx.digest()) -- cgit v1.2.3 From e5e7ca66a7fc6b8073c30a048e1157b88d427980 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 30 Oct 2025 12:58:32 -0700 Subject: docs: kdoc: fix duplicate section warning message The python version of the kernel-doc parser emits some strange warnings with just a line number in certain cases: $ ./scripts/kernel-doc -Wall -none 'include/linux/virtio_config.h' Warning: 174 Warning: 184 Warning: 190 Warning: include/linux/virtio_config.h:226 No description found for return value of '__virtio_test_bit' Warning: include/linux/virtio_config.h:259 No description found for return value of 'virtio_has_feature' Warning: include/linux/virtio_config.h:283 No description found for return value of 'virtio_has_dma_quirk' Warning: include/linux/virtio_config.h:392 No description found for return value of 'virtqueue_set_affinity' I eventually tracked this down to the lone call of emit_msg() in the KernelEntry class, which looks like: self.emit_msg(self.new_start_line, f"duplicate section name '{name}'\n") This looks like all the other emit_msg calls. Unfortunately, the definition within the KernelEntry class takes only a message parameter and not a line number. The intended message is passed as the warning! Pass the filename to the KernelEntry class, and use this to build the log message in the same way as the KernelDoc class does. To avoid future errors, mark the warning parameter for both emit_msg definitions as a keyword-only argument. This will prevent accidentally passing a string as the warning parameter in the future. Also fix the call in dump_section to avoid an unnecessary additional newline. Fixes: e3b42e94cf10 ("scripts/lib/kdoc/kdoc_parser.py: move kernel entry to a class") Signed-off-by: Jacob Keller Signed-off-by: Jonathan Corbet Message-ID: <20251030-jk-fix-kernel-doc-duplicate-return-warning-v2-1-ec4b5c662881@intel.com> --- scripts/lib/kdoc/kdoc_parser.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 6e5c115cbdf3..ee1a4ea6e725 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -275,6 +275,8 @@ class KernelEntry: self.leading_space = None + self.fname = fname + # State flags self.brcount = 0 self.declaration_start_line = ln + 1 @@ -289,9 +291,11 @@ class KernelEntry: return '\n'.join(self._contents) + '\n' # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, log_msg, warning=True): + def emit_msg(self, ln, msg, *, warning=True): """Emit a message""" + log_msg = f"{self.fname}:{ln} {msg}" + if not warning: self.config.log.info(log_msg) return @@ -337,7 +341,7 @@ class KernelEntry: # Only warn on user-specified duplicate section names if name != SECTION_DEFAULT: self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'\n") + f"duplicate section name '{name}'") # Treat as a new paragraph - add a blank line self.sections[name] += '\n' + contents else: @@ -393,15 +397,15 @@ class KernelDoc: 'Python 3.7 or later is required for correct results') python_warning = True - def emit_msg(self, ln, msg, warning=True): + def emit_msg(self, ln, msg, *, warning=True): """Emit a message""" - log_msg = f"{self.fname}:{ln} {msg}" - if self.entry: - self.entry.emit_msg(log_msg, warning) + self.entry.emit_msg(ln, msg, warning=warning) return + log_msg = f"{self.fname}:{ln} {msg}" + if warning: self.config.log.warning(log_msg) else: -- cgit v1.2.3 From b36d4b6aa88ef039647228b98c59a875e92f8c8e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:33 +0100 Subject: arch: hookup listns() system call Add the listns() system call to all architectures. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-20-2e6f823ebdc0@kernel.org Tested-by: syzbot@syzkaller.appspotmail.com Reviewed-by: Arnd Bergmann Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/tools/syscall_32.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/uapi/asm-generic/unistd.h | 4 +++- scripts/syscall.tbl | 1 + 18 files changed, 20 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 16dca28ebf17..3fed97478058 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -509,3 +509,4 @@ 577 common open_tree_attr sys_open_tree_attr 578 common file_getattr sys_file_getattr 579 common file_setattr sys_file_setattr +580 common listns sys_listns diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b07e699aaa3c..fd09afae72a2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -484,3 +484,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/arm64/tools/syscall_32.tbl b/arch/arm64/tools/syscall_32.tbl index 8d9088bc577d..8cdfe5d4dac9 100644 --- a/arch/arm64/tools/syscall_32.tbl +++ b/arch/arm64/tools/syscall_32.tbl @@ -481,3 +481,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f41d38dfbf13..871a5d67bf41 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 580af574fe73..022fc85d94b3 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -475,3 +475,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index d824ffe9a014..8cedc83c3266 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -408,3 +408,4 @@ 467 n32 open_tree_attr sys_open_tree_attr 468 n32 file_getattr sys_file_getattr 469 n32 file_setattr sys_file_setattr +470 n32 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 7a7049c2c307..9b92bddf06b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -384,3 +384,4 @@ 467 n64 open_tree_attr sys_open_tree_attr 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr +470 n64 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index d330274f0601..f810b8a55716 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -457,3 +457,4 @@ 467 o32 open_tree_attr sys_open_tree_attr 468 o32 file_getattr sys_file_getattr 469 o32 file_setattr sys_file_setattr +470 o32 listns sys_listns diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 88a788a7b18d..39bdacaa530b 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -468,3 +468,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b453e80dfc00..ec4458cdb97b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -560,3 +560,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8a6744d658db..5863787ab036 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -472,3 +472,4 @@ 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr sys_file_setattr +470 common listns sys_listns sys_listns diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 5e9c9eff5539..969c11325ade 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ebb7d06d1044..39aa26b6a50b 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4877e16da69a..e979a3eac7a3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -475,3 +475,4 @@ 467 i386 open_tree_attr sys_open_tree_attr 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr +470 i386 listns sys_listns diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ced2a1deecd7..8a4ac4841be6 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -394,6 +394,7 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 374e4cb788d8..438a3b170402 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 04e0077fb4c9..942370b3f5d2 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) __SYSCALL(__NR_file_getattr, sys_file_getattr) #define __NR_file_setattr 469 __SYSCALL(__NR_file_setattr, sys_file_setattr) +#define __NR_listns 470 +__SYSCALL(__NR_listns, sys_listns) #undef __NR_syscalls -#define __NR_syscalls 470 +#define __NR_syscalls 471 /* * 32 bit systems traditionally used different diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index d1ae5e92c615..e74868be513c 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -410,3 +410,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns -- cgit v1.2.3 From 469c1c9eb6c9243e4b59ef93518d4e0acb1b2b3e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Nov 2025 22:55:02 +0100 Subject: kernel-doc: Issue warnings that were silently discarded When kernel-doc parses the sections for the documentation some errors may occur. In many cases the warning is simply stored to the current "entry" object. However, in the most of such cases this object gets discarded and there is no way for the output engine to even know about that. To avoid that, check if the "entry" is going to be discarded and if there warnings have been collected, issue them to the current logger as is and then flush the "entry". This fixes the problem that original Perl implementation doesn't have. As of Linux kernel v6.18-rc4 the reproducer can be: $ scripts/kernel-doc -v -none -Wall include/linux/util_macros.h ... Info: include/linux/util_macros.h:138 Scanning doc for function to_user_ptr ... while with the proposed change applied it gives one more line: $ scripts/kernel-doc -v -none -Wall include/linux/util_macros.h ... Info: include/linux/util_macros.h:138 Scanning doc for function to_user_ptr Warning: include/linux/util_macros.h:144 expecting prototype for to_user_ptr(). Prototype was for u64_to_user_ptr() instead ... And with the original Perl script: $ scripts/kernel-doc.pl -v -none -Wall include/linux/util_macros.h ... include/linux/util_macros.h:139: info: Scanning doc for function to_user_ptr include/linux/util_macros.h:149: warning: expecting prototype for to_user_ptr(). Prototype was for u64_to_user_ptr() instead ... Fixes: 9cbc2d3b137b ("scripts/kernel-doc.py: postpone warnings to the output plugin") Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jonathan Corbet Message-ID: <20251104215502.1049817-1-andriy.shevchenko@linux.intel.com> --- scripts/lib/kdoc/kdoc_parser.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ee1a4ea6e725..f7dbb0868367 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -451,6 +451,13 @@ class KernelDoc: variables used by the state machine. """ + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + self.entry = KernelEntry(self.config, self.fname, ln) # State flags -- cgit v1.2.3 From 6fa873641c0bdfa849130a81aa7339ccfd42b52a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Oct 2025 22:50:24 -0700 Subject: lib/crypto: sha3: Add FIPS cryptographic algorithm self-test Since the SHA-3 algorithms are FIPS-approved, add the boot-time self-test which is apparently required. This closely follows the corresponding SHA-1, SHA-256, and SHA-512 tests. Tested-by: Harald Freudenberger Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251026055032.1413733-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/fips.h | 7 +++++++ lib/crypto/sha3.c | 17 ++++++++++++++++- scripts/crypto/gen-fips-testvecs.py | 4 ++++ 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/lib/crypto/fips.h b/lib/crypto/fips.h index 78a1bdd33a15..023410c2e0db 100644 --- a/lib/crypto/fips.h +++ b/lib/crypto/fips.h @@ -36,3 +36,10 @@ static const u8 fips_test_hmac_sha512_value[] __initconst __maybe_unused = { 0x57, 0x0b, 0x15, 0x38, 0x95, 0xd8, 0xa3, 0x81, 0xba, 0xb3, 0x15, 0x37, 0x5c, 0x6d, 0x57, 0x2b, }; + +static const u8 fips_test_sha3_256_value[] __initconst __maybe_unused = { + 0x77, 0xc4, 0x8b, 0x69, 0x70, 0x5f, 0x0a, 0xb1, + 0xb1, 0xa5, 0x82, 0x0a, 0x22, 0x2b, 0x49, 0x31, + 0xba, 0x9b, 0xb6, 0xaa, 0x32, 0xa7, 0x97, 0x00, + 0x98, 0xdb, 0xff, 0xe7, 0xc6, 0xde, 0xb5, 0x82, +}; diff --git a/lib/crypto/sha3.c b/lib/crypto/sha3.c index 2102c2ecac96..8434f9bff138 100644 --- a/lib/crypto/sha3.c +++ b/lib/crypto/sha3.c @@ -17,6 +17,7 @@ #include #include #include +#include "fips.h" /* * On some 32-bit architectures, such as h8300, GCC ends up using over 1 KB of @@ -341,10 +342,24 @@ void shake256(const u8 *in, size_t in_len, u8 *out, size_t out_len) } EXPORT_SYMBOL_GPL(shake256); -#ifdef sha3_mod_init_arch +#if defined(sha3_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha3_mod_init(void) { +#ifdef sha3_mod_init_arch sha3_mod_init_arch(); +#endif + if (fips_enabled) { + /* + * FIPS cryptographic algorithm self-test. As per the FIPS + * Implementation Guidance, testing any SHA-3 algorithm + * satisfies the test requirement for all of them. + */ + u8 hash[SHA3_256_DIGEST_SIZE]; + + sha3_256(fips_test_data, sizeof(fips_test_data), hash); + if (memcmp(fips_test_sha3_256_value, hash, sizeof(hash)) != 0) + panic("sha3: FIPS self-test failed\n"); + } return 0; } subsys_initcall(sha3_mod_init); diff --git a/scripts/crypto/gen-fips-testvecs.py b/scripts/crypto/gen-fips-testvecs.py index 2956f88b764a..db873f88619a 100755 --- a/scripts/crypto/gen-fips-testvecs.py +++ b/scripts/crypto/gen-fips-testvecs.py @@ -5,6 +5,7 @@ # # Copyright 2025 Google LLC +import hashlib import hmac fips_test_data = b"fips test data\0\0" @@ -30,3 +31,6 @@ for alg in 'sha1', 'sha256', 'sha512': ctx = hmac.new(fips_test_key, digestmod=alg) ctx.update(fips_test_data) print_static_u8_array_definition(f'fips_test_hmac_{alg}_value', ctx.digest()) + +print_static_u8_array_definition(f'fips_test_sha3_256_value', + hashlib.sha3_256(fips_test_data).digest()) -- cgit v1.2.3 From af61da281f52aba0c5b090bafb3a31c5739850ff Mon Sep 17 00:00:00 2001 From: Mikhail Malyshev Date: Wed, 15 Oct 2025 16:34:52 +0000 Subject: kbuild: Use objtree for module signing key path When building out-of-tree modules with CONFIG_MODULE_SIG_FORCE=y, module signing fails because the private key path uses $(srctree) while the public key path uses $(objtree). Since signing keys are generated in the build directory during kernel compilation, both paths should use $(objtree) for consistency. This causes SSL errors like: SSL error:02001002:system library:fopen:No such file or directory sign-file: /kernel-src/certs/signing_key.pem The issue occurs because: - sig-key uses: $(srctree)/certs/signing_key.pem (source tree) - cmd_sign uses: $(objtree)/certs/signing_key.x509 (build tree) But both keys are generated in $(objtree) during the build. This complements commit 25ff08aa43e37 ("kbuild: Fix signing issue for external modules") which fixed the scripts path and public key path, but missed the private key path inconsistency. Fixes out-of-tree module signing for configurations with separate source and build directories (e.g., O=/kernel-out). Signed-off-by: Mikhail Malyshev Reviewed-by: Nathan Chancellor Tested-by: Nicolas Schier Link: https://patch.msgid.link/20251015163452.3754286-1-mike.malyshev@gmail.com Signed-off-by: Nicolas Schier --- scripts/Makefile.modinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 1628198f3e83..9ba45e5b32b1 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -100,7 +100,7 @@ endif # Don't stop modules_install even if we can't sign external modules. # ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),) -sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY) +sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(objtree)/)$(CONFIG_MODULE_SIG_KEY) else sig-key := $(CONFIG_MODULE_SIG_KEY) endif -- cgit v1.2.3 From 7319256dda306b867506899b6438e6eb96a1ead0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Oct 2025 22:01:36 +0200 Subject: kbuild: Rename Makefile.extrawarn to Makefile.warn Since commit e88ca24319e4 ("kbuild: consolidate warning flags in scripts/Makefile.extrawarn"), scripts/Makefile.extrawarn contains all warnings for the main kernel build, not just warnings enabled by the values for W=. Rename it to scripts/Makefile.warn to make it clearer that this Makefile is where all Kbuild warning handling should exist. Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251023-rename-scripts-makefile-extrawarn-v1-1-8f7531542169@kernel.org Signed-off-by: Nicolas Schier --- Makefile | 2 +- drivers/gpu/drm/Makefile | 2 +- scripts/Makefile.extrawarn | 235 --------------------------------------------- scripts/Makefile.warn | 235 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 237 insertions(+), 237 deletions(-) delete mode 100644 scripts/Makefile.extrawarn create mode 100644 scripts/Makefile.warn (limited to 'scripts') diff --git a/Makefile b/Makefile index 79b631075d33..96ca0f17d158 100644 --- a/Makefile +++ b/Makefile @@ -1084,7 +1084,7 @@ KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) endif # include additional Makefiles when needed -include-y := scripts/Makefile.extrawarn +include-y := scripts/Makefile.warn include-$(CONFIG_DEBUG_INFO) += scripts/Makefile.debug include-$(CONFIG_DEBUG_INFO_BTF)+= scripts/Makefile.btf include-$(CONFIG_KASAN) += scripts/Makefile.kasan diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4b2f7d794275..d1a102c4b80a 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -6,7 +6,7 @@ CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE # Unconditionally enable W=1 warnings locally -# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn +# --- begin copy-paste W=1 warnings from scripts/Makefile.warn subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter subdir-ccflags-y += $(call cc-option, -Wrestrict) subdir-ccflags-y += -Wmissing-format-attribute diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn deleted file mode 100644 index 68e6fafcb80c..000000000000 --- a/scripts/Makefile.extrawarn +++ /dev/null @@ -1,235 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# ========================================================================== -# make W=... settings -# -# There are four warning groups enabled by W=1, W=2, W=3, and W=e -# They are independent, and can be combined like W=12 or W=123e. -# ========================================================================== - -# Default set of warnings, always enabled -KBUILD_CFLAGS += -Wall -KBUILD_CFLAGS += -Wextra -KBUILD_CFLAGS += -Wundef -KBUILD_CFLAGS += -Werror=implicit-function-declaration -KBUILD_CFLAGS += -Werror=implicit-int -KBUILD_CFLAGS += -Werror=return-type -KBUILD_CFLAGS += -Werror=strict-prototypes -KBUILD_CFLAGS += -Wno-format-security -KBUILD_CFLAGS += -Wno-trigraphs -KBUILD_CFLAGS += $(call cc-option, -Wno-frame-address) -KBUILD_CFLAGS += $(call cc-option, -Wno-address-of-packed-member) -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-prototypes - -ifneq ($(CONFIG_FRAME_WARN),0) -KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN) -endif - -KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds - -ifdef CONFIG_CC_IS_CLANG -# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and -# Microsoft extensions is acceptable. -KBUILD_CFLAGS += -Wno-gnu -KBUILD_CFLAGS += -Wno-microsoft-anon-tag - -# Clang checks for overflow/truncation with '%p', while GCC does not: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow-non-kprintf) -KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation-non-kprintf) - -# Clang may emit a warning when a const variable, such as the dummy variables -# in typecheck(), or const member of an aggregate type are not initialized, -# which can result in unexpected behavior. However, in many audited cases of -# the "field" variant of the warning, this is intentional because the field is -# never used within a particular call path, the field is within a union with -# other non-const members, or the containing object is not const so the field -# can be modified via memcpy() / memset(). While the variable warning also gets -# disabled with this same switch, there should not be too much coverage lost -# because -Wuninitialized will still flag when an uninitialized const variable -# is used. -KBUILD_CFLAGS += $(call cc-option, -Wno-default-const-init-unsafe) -else - -# gcc inanely warns about local variables called 'main' -KBUILD_CFLAGS += -Wno-main -endif - -# These result in bogus false positives -KBUILD_CFLAGS += $(call cc-option, -Wno-dangling-pointer) - -# Stack Variable Length Arrays (VLAs) must not be used in the kernel. -# Function array parameters should, however, be usable, but -Wvla will -# warn for those. Clang has no way yet to distinguish between the VLA -# types, so depend on GCC for now to keep stack VLAs out of the tree. -# https://github.com/llvm/llvm-project/issues/57098 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98217 -KBUILD_CFLAGS += $(call cc-option,-Wvla-larger-than=1) - -# disable pointer signed / unsigned warnings in gcc 4.0 -KBUILD_CFLAGS += -Wno-pointer-sign - -# In order to make sure new function cast mismatches are not introduced -# in the kernel (to avoid tripping CFI checking), the kernel should be -# globally built with -Wcast-function-type. -KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) - -# Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - -# Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) - -# The allocators already balk at large sizes, so silence the compiler -# warnings for bounds checks involving those possible values. While -# -Wno-alloc-size-larger-than would normally be used here, earlier versions -# of gcc (<9.1) weirdly don't handle the option correctly when _other_ -# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX -# doesn't work (as it is documented to), silently resolving to "0" prior to -# version 9.1 (and producing an error more recently). Numeric values larger -# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently -# ignored, continuing to default to PTRDIFF_MAX. So, left with no other -# choice, we must perform a versioned check to disable this warning. -# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au -KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) - -# Prohibit date/time macros, which would make the build non-deterministic -KBUILD_CFLAGS += -Werror=date-time - -# enforce correct pointer usage -KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) - -# Require designated initializers for all marked structures -KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) - -# Warn if there is an enum types mismatch -KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) - -KBUILD_CFLAGS += -Wunused - -# -# W=1 - warnings which may be relevant and do not occur too often -# -ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-include-dirs -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) - -KBUILD_CPPFLAGS += -Wundef -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 - -else - -# Some diagnostics enabled by default are noisy. -# Suppress them by using -Wno... except for W=1. -KBUILD_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wno-unused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wno-packed-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow) -ifdef CONFIG_CC_IS_GCC -KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation) -endif -KBUILD_CFLAGS += $(call cc-option, -Wno-stringop-truncation) - -KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang - -ifdef CONFIG_CC_IS_CLANG -# Clang before clang-16 would warn on default argument promotions. -ifneq ($(call clang-min-version, 160000),y) -# Disable -Wformat -KBUILD_CFLAGS += -Wno-format -# Then re-enable flags that were part of the -Wformat group that aren't -# problematic. -KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier -KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull -# Requires clang-12+. -ifeq ($(call clang-min-version, 120000),y) -KBUILD_CFLAGS += -Wformat-insufficient-args -endif -endif -KBUILD_CFLAGS += $(call cc-option, -Wno-pointer-to-enum-cast) -KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare -KBUILD_CFLAGS += $(call cc-option, -Wno-unaligned-access) -KBUILD_CFLAGS += -Wno-enum-compare-conditional -endif - -endif - -# -# W=2 - warnings which occur quite often but may still be relevant -# -ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wdisabled-optimization -KBUILD_CFLAGS += -Wshadow -KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) - -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 - -else - -# The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-missing-field-initializers -KBUILD_CFLAGS += -Wno-type-limits -KBUILD_CFLAGS += -Wno-shift-negative-value - -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Wno-enum-enum-conversion -endif - -ifdef CONFIG_CC_IS_GCC -KBUILD_CFLAGS += -Wno-maybe-uninitialized -endif - -endif - -# -# W=3 - more obscure warnings, can most likely be ignored -# -ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wbad-function-cast -KBUILD_CFLAGS += -Wcast-align -KBUILD_CFLAGS += -Wcast-qual -KBUILD_CFLAGS += -Wconversion -KBUILD_CFLAGS += -Wpacked -KBUILD_CFLAGS += -Wpadded -KBUILD_CFLAGS += -Wpointer-arith -KBUILD_CFLAGS += -Wredundant-decls -KBUILD_CFLAGS += -Wsign-compare -KBUILD_CFLAGS += -Wswitch-default - -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3 - -else - -# The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-unused-parameter - -endif - -# -# W=e and CONFIG_WERROR - error out on warnings -# -ifneq ($(findstring e, $(KBUILD_EXTRA_WARN))$(CONFIG_WERROR),) - -KBUILD_CPPFLAGS += -Werror -KBUILD_AFLAGS += -Wa,--fatal-warnings -KBUILD_LDFLAGS += --fatal-warnings -KBUILD_USERCFLAGS += -Werror -KBUILD_USERLDFLAGS += -Wl,--fatal-warnings -KBUILD_RUSTFLAGS += -Dwarnings - -# While hostprog flags are used during build bootstrapping (thus should not -# depend on CONFIG_ symbols), -Werror is disruptive and should be opted into. -# Only apply -Werror to hostprogs built after the initial Kconfig stage. -KBUILD_HOSTCFLAGS += -Werror -KBUILD_HOSTLDFLAGS += -Wl,--fatal-warnings -KBUILD_HOSTRUSTFLAGS += -Dwarnings - -endif diff --git a/scripts/Makefile.warn b/scripts/Makefile.warn new file mode 100644 index 000000000000..68e6fafcb80c --- /dev/null +++ b/scripts/Makefile.warn @@ -0,0 +1,235 @@ +# SPDX-License-Identifier: GPL-2.0 +# ========================================================================== +# make W=... settings +# +# There are four warning groups enabled by W=1, W=2, W=3, and W=e +# They are independent, and can be combined like W=12 or W=123e. +# ========================================================================== + +# Default set of warnings, always enabled +KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra +KBUILD_CFLAGS += -Wundef +KBUILD_CFLAGS += -Werror=implicit-function-declaration +KBUILD_CFLAGS += -Werror=implicit-int +KBUILD_CFLAGS += -Werror=return-type +KBUILD_CFLAGS += -Werror=strict-prototypes +KBUILD_CFLAGS += -Wno-format-security +KBUILD_CFLAGS += -Wno-trigraphs +KBUILD_CFLAGS += $(call cc-option, -Wno-frame-address) +KBUILD_CFLAGS += $(call cc-option, -Wno-address-of-packed-member) +KBUILD_CFLAGS += -Wmissing-declarations +KBUILD_CFLAGS += -Wmissing-prototypes + +ifneq ($(CONFIG_FRAME_WARN),0) +KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN) +endif + +KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds + +ifdef CONFIG_CC_IS_CLANG +# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and +# Microsoft extensions is acceptable. +KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag + +# Clang checks for overflow/truncation with '%p', while GCC does not: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 +KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow-non-kprintf) +KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation-non-kprintf) + +# Clang may emit a warning when a const variable, such as the dummy variables +# in typecheck(), or const member of an aggregate type are not initialized, +# which can result in unexpected behavior. However, in many audited cases of +# the "field" variant of the warning, this is intentional because the field is +# never used within a particular call path, the field is within a union with +# other non-const members, or the containing object is not const so the field +# can be modified via memcpy() / memset(). While the variable warning also gets +# disabled with this same switch, there should not be too much coverage lost +# because -Wuninitialized will still flag when an uninitialized const variable +# is used. +KBUILD_CFLAGS += $(call cc-option, -Wno-default-const-init-unsafe) +else + +# gcc inanely warns about local variables called 'main' +KBUILD_CFLAGS += -Wno-main +endif + +# These result in bogus false positives +KBUILD_CFLAGS += $(call cc-option, -Wno-dangling-pointer) + +# Stack Variable Length Arrays (VLAs) must not be used in the kernel. +# Function array parameters should, however, be usable, but -Wvla will +# warn for those. Clang has no way yet to distinguish between the VLA +# types, so depend on GCC for now to keep stack VLAs out of the tree. +# https://github.com/llvm/llvm-project/issues/57098 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98217 +KBUILD_CFLAGS += $(call cc-option,-Wvla-larger-than=1) + +# disable pointer signed / unsigned warnings in gcc 4.0 +KBUILD_CFLAGS += -Wno-pointer-sign + +# In order to make sure new function cast mismatches are not introduced +# in the kernel (to avoid tripping CFI checking), the kernel should be +# globally built with -Wcast-function-type. +KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) + +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) + +# The allocators already balk at large sizes, so silence the compiler +# warnings for bounds checks involving those possible values. While +# -Wno-alloc-size-larger-than would normally be used here, earlier versions +# of gcc (<9.1) weirdly don't handle the option correctly when _other_ +# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX +# doesn't work (as it is documented to), silently resolving to "0" prior to +# version 9.1 (and producing an error more recently). Numeric values larger +# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently +# ignored, continuing to default to PTRDIFF_MAX. So, left with no other +# choice, we must perform a versioned check to disable this warning. +# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au +KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) + +# Prohibit date/time macros, which would make the build non-deterministic +KBUILD_CFLAGS += -Werror=date-time + +# enforce correct pointer usage +KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) + +# Require designated initializers for all marked structures +KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) + +# Warn if there is an enum types mismatch +KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) + +KBUILD_CFLAGS += -Wunused + +# +# W=1 - warnings which may be relevant and do not occur too often +# +ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wmissing-format-attribute +KBUILD_CFLAGS += -Wmissing-include-dirs +KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) + +KBUILD_CPPFLAGS += -Wundef +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + +else + +# Some diagnostics enabled by default are noisy. +# Suppress them by using -Wno... except for W=1. +KBUILD_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-option, -Wno-unused-const-variable) +KBUILD_CFLAGS += $(call cc-option, -Wno-packed-not-aligned) +KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow) +ifdef CONFIG_CC_IS_GCC +KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation) +endif +KBUILD_CFLAGS += $(call cc-option, -Wno-stringop-truncation) + +KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang + +ifdef CONFIG_CC_IS_CLANG +# Clang before clang-16 would warn on default argument promotions. +ifneq ($(call clang-min-version, 160000),y) +# Disable -Wformat +KBUILD_CFLAGS += -Wno-format +# Then re-enable flags that were part of the -Wformat group that aren't +# problematic. +KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier +KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull +# Requires clang-12+. +ifeq ($(call clang-min-version, 120000),y) +KBUILD_CFLAGS += -Wformat-insufficient-args +endif +endif +KBUILD_CFLAGS += $(call cc-option, -Wno-pointer-to-enum-cast) +KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare +KBUILD_CFLAGS += $(call cc-option, -Wno-unaligned-access) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +endif + +endif + +# +# W=2 - warnings which occur quite often but may still be relevant +# +ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wdisabled-optimization +KBUILD_CFLAGS += -Wshadow +KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) +KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 + +else + +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-missing-field-initializers +KBUILD_CFLAGS += -Wno-type-limits +KBUILD_CFLAGS += -Wno-shift-negative-value + +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + +ifdef CONFIG_CC_IS_GCC +KBUILD_CFLAGS += -Wno-maybe-uninitialized +endif + +endif + +# +# W=3 - more obscure warnings, can most likely be ignored +# +ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wbad-function-cast +KBUILD_CFLAGS += -Wcast-align +KBUILD_CFLAGS += -Wcast-qual +KBUILD_CFLAGS += -Wconversion +KBUILD_CFLAGS += -Wpacked +KBUILD_CFLAGS += -Wpadded +KBUILD_CFLAGS += -Wpointer-arith +KBUILD_CFLAGS += -Wredundant-decls +KBUILD_CFLAGS += -Wsign-compare +KBUILD_CFLAGS += -Wswitch-default + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3 + +else + +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-unused-parameter + +endif + +# +# W=e and CONFIG_WERROR - error out on warnings +# +ifneq ($(findstring e, $(KBUILD_EXTRA_WARN))$(CONFIG_WERROR),) + +KBUILD_CPPFLAGS += -Werror +KBUILD_AFLAGS += -Wa,--fatal-warnings +KBUILD_LDFLAGS += --fatal-warnings +KBUILD_USERCFLAGS += -Werror +KBUILD_USERLDFLAGS += -Wl,--fatal-warnings +KBUILD_RUSTFLAGS += -Dwarnings + +# While hostprog flags are used during build bootstrapping (thus should not +# depend on CONFIG_ symbols), -Werror is disruptive and should be opted into. +# Only apply -Werror to hostprogs built after the initial Kconfig stage. +KBUILD_HOSTCFLAGS += -Werror +KBUILD_HOSTLDFLAGS += -Wl,--fatal-warnings +KBUILD_HOSTRUSTFLAGS += -Dwarnings + +endif -- cgit v1.2.3 From 9362d34acf91a706c543d919ade3e651b9bd2d6f Mon Sep 17 00:00:00 2001 From: Pat Somaru Date: Tue, 7 Oct 2025 20:45:28 -0400 Subject: scripts/clang-tools: Handle included .c files in gen_compile_commands The gen_compile_commands.py script currently only creates entries for the primary source files found in .cmd files, but some kernel source files text-include others (i.e. kernel/sched/build_policy.c). This prevents tools like clangd from working properly on text-included c files, such as kernel/sched/ext.c because the generated compile_commands.json does not have entries for them. Extend process_line() to detect when a source file includes .c files, and generate additional compile_commands.json entries for them. For included c files, use the same compile flags as their parent and add their parents headers. This enables lsp tools like clangd to work properly on files like kernel/sched/ext.c Signed-off-by: Pat Somaru Reviewed-by: Nathan Chancellor Tested-by: Justin Stitt Tested-by: Eduard Zingerman Link: https://patch.msgid.link/20251008004615.2690081-1-patso@likewhatevs.io Signed-off-by: Nicolas Schier --- scripts/clang-tools/gen_compile_commands.py | 135 ++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 96e6e46ad1a7..6f4afa92a466 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -21,6 +21,12 @@ _DEFAULT_LOG_LEVEL = 'WARNING' _FILENAME_PATTERN = r'^\..*\.cmd$' _LINE_PATTERN = r'^(saved)?cmd_[^ ]*\.o := (?P.* )(?P[^ ]*\.[cS]) *(;|$)' _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + +# Pre-compiled regexes for better performance +_INCLUDE_PATTERN = re.compile(r'^\s*#\s*include\s*[<"]([^>"]*)[>"]') +_C_INCLUDE_PATTERN = re.compile(r'^\s*#\s*include\s*"([^"]*\.c)"\s*$') +_FILENAME_MATCHER = re.compile(_FILENAME_PATTERN) + # The tools/ directory adopts a different build system, and produces .cmd # files in a different format. Do not support it. _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools'] @@ -82,7 +88,6 @@ def cmdfiles_in_dir(directory): The path to a .cmd file. """ - filename_matcher = re.compile(_FILENAME_PATTERN) exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ] for dirpath, dirnames, filenames in os.walk(directory, topdown=True): @@ -92,7 +97,7 @@ def cmdfiles_in_dir(directory): continue for filename in filenames: - if filename_matcher.match(filename): + if _FILENAME_MATCHER.match(filename): yield os.path.join(dirpath, filename) @@ -149,8 +154,87 @@ def cmdfiles_for_modorder(modorder): yield to_cmdfile(mod_line.rstrip()) +def extract_includes_from_file(source_file, root_directory): + """Extract #include statements from a C file. + + Args: + source_file: Path to the source .c file to analyze + root_directory: Root directory for resolving relative paths + + Returns: + List of header files that should be included (without quotes/brackets) + """ + includes = [] + if not os.path.exists(source_file): + return includes + + try: + with open(source_file, 'r') as f: + for line in f: + line = line.strip() + # Look for #include statements. + # Match both #include "header.h" and #include . + match = _INCLUDE_PATTERN.match(line) + if match: + header = match.group(1) + # Skip including other .c files to avoid circular includes. + if not header.endswith('.c'): + # For relative includes (quoted), resolve path relative to source file. + if '"' in line: + src_dir = os.path.dirname(source_file) + header_path = os.path.join(src_dir, header) + if os.path.exists(header_path): + rel_header = os.path.relpath(header_path, root_directory) + includes.append(rel_header) + else: + includes.append(header) + else: + # System include like . + includes.append(header) + except IOError: + pass + + return includes + + +def find_included_c_files(source_file, root_directory): + """Find .c files that are included by the given source file. + + Args: + source_file: Path to the source .c file + root_directory: Root directory for resolving relative paths + + Yields: + Full paths to included .c files + """ + if not os.path.exists(source_file): + return + + try: + with open(source_file, 'r') as f: + for line in f: + line = line.strip() + # Look for #include "*.c" patterns. + match = _C_INCLUDE_PATTERN.match(line) + if match: + included_file = match.group(1) + # Handle relative paths. + if not os.path.isabs(included_file): + src_dir = os.path.dirname(source_file) + included_file = os.path.join(src_dir, included_file) + + # Normalize the path. + included_file = os.path.normpath(included_file) + + # Check if the file exists. + if os.path.exists(included_file): + yield included_file + except IOError: + pass + + def process_line(root_directory, command_prefix, file_path): - """Extracts information from a .cmd line and creates an entry from it. + """Extracts information from a .cmd line and creates entries from it. Args: root_directory: The directory that was searched for .cmd files. Usually @@ -160,7 +244,8 @@ def process_line(root_directory, command_prefix, file_path): Usually relative to root_directory, but sometimes absolute. Returns: - An entry to append to compile_commands. + A list of entries to append to compile_commands (may include multiple + entries if the source file includes other .c files). Raises: ValueError: Could not find the extracted file based on file_path and @@ -176,11 +261,47 @@ def process_line(root_directory, command_prefix, file_path): abs_path = os.path.realpath(os.path.join(root_directory, file_path)) if not os.path.exists(abs_path): raise ValueError('File %s not found' % abs_path) - return { + + entries = [] + + # Create entry for the main source file. + main_entry = { 'directory': root_directory, 'file': abs_path, 'command': prefix + file_path, } + entries.append(main_entry) + + # Find and create entries for included .c files. + for included_c_file in find_included_c_files(abs_path, root_directory): + # For included .c files, create a compilation command that: + # 1. Uses the same compilation flags as the parent file + # 2. But compiles the included file directly (not the parent) + # 3. Includes necessary headers from the parent file for proper macro resolution + + # Convert absolute path to relative for the command. + rel_path = os.path.relpath(included_c_file, root_directory) + + # Extract includes from the parent file to provide proper compilation context. + extra_includes = '' + try: + parent_includes = extract_includes_from_file(abs_path, root_directory) + if parent_includes: + extra_includes = ' ' + ' '.join('-include ' + inc for inc in parent_includes) + except IOError: + pass + + included_entry = { + 'directory': root_directory, + 'file': included_c_file, + # Use the same compilation prefix but target the included file directly. + # Add extra headers for proper macro resolution. + 'command': prefix + extra_includes + ' ' + rel_path, + } + entries.append(included_entry) + logging.debug('Added entry for included file: %s', included_c_file) + + return entries def main(): @@ -213,9 +334,9 @@ def main(): result = line_matcher.match(f.readline()) if result: try: - entry = process_line(directory, result.group('command_prefix'), + entries = process_line(directory, result.group('command_prefix'), result.group('file_path')) - compile_commands.append(entry) + compile_commands.extend(entries) except ValueError as err: logging.info('Could not add line from %s: %s', cmdfile, err) -- cgit v1.2.3 From 7bade3f7e91969985149a66c98ef0d1d842ff464 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Wed, 5 Nov 2025 21:26:02 +0100 Subject: scripts: headers_install.sh: Remove two outdated config leak ignore entries Remove config leak ignore entries for arch/arc/include/uapi/asm/page.h as they have been removed in commit d3e5bab923d3 ("arch: simplify architecture specific page size configuration"). Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251105-update-headers-install-config-leak-ignore-list-v1-1-40be3eed68cb@kernel.org Signed-off-by: Nicolas Schier --- scripts/headers_install.sh | 2 -- 1 file changed, 2 deletions(-) (limited to 'scripts') diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh index 4c20c62c4faf..0e4e939efc94 100755 --- a/scripts/headers_install.sh +++ b/scripts/headers_install.sh @@ -70,8 +70,6 @@ configs=$(sed -e ' # # The format is : in each line. config_leak_ignores=" -arch/arc/include/uapi/asm/page.h:CONFIG_ARC_PAGE_SIZE_16K -arch/arc/include/uapi/asm/page.h:CONFIG_ARC_PAGE_SIZE_4K arch/arc/include/uapi/asm/swab.h:CONFIG_ARC_HAS_SWAPE arch/arm/include/uapi/asm/ptrace.h:CONFIG_CPU_ENDIAN_BE8 arch/nios2/include/uapi/asm/swab.h:CONFIG_NIOS2_CI_SWAB_NO -- cgit v1.2.3 From 6401fd334ddf5e2035a0dca27cd761974d568fcd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 17 Oct 2025 21:31:04 -0700 Subject: lib/crypto: tests: Add KUnit tests for BLAKE2b Add a KUnit test suite for the BLAKE2b library API, mirroring the BLAKE2s test suite very closely. As with the BLAKE2s test suite, a benchmark is included. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251018043106.375964-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 9 + lib/crypto/tests/Makefile | 1 + lib/crypto/tests/blake2b-testvecs.h | 342 ++++++++++++++++++++++++++++++++++++ lib/crypto/tests/blake2b_kunit.c | 133 ++++++++++++++ scripts/crypto/gen-hash-testvecs.py | 29 +-- 5 files changed, 501 insertions(+), 13 deletions(-) create mode 100644 lib/crypto/tests/blake2b-testvecs.h create mode 100644 lib/crypto/tests/blake2b_kunit.c (limited to 'scripts') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 578af717e13a..2ebfd681bae4 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -1,5 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-or-later +config CRYPTO_LIB_BLAKE2B_KUNIT_TEST + tristate "KUnit tests for BLAKE2b" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_BLAKE2B + help + KUnit tests for the BLAKE2b cryptographic hash function. + config CRYPTO_LIB_BLAKE2S_KUNIT_TEST tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index a71fad19922b..f21a48a4415d 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_CRYPTO_LIB_BLAKE2B_KUNIT_TEST) += blake2b_kunit.o obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o diff --git a/lib/crypto/tests/blake2b-testvecs.h b/lib/crypto/tests/blake2b-testvecs.h new file mode 100644 index 000000000000..9e407dbc219c --- /dev/null +++ b/lib/crypto/tests/blake2b-testvecs.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py blake2b */ + +static const struct { + size_t data_len; + u8 digest[BLAKE2B_HASH_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0x78, 0x6a, 0x02, 0xf7, 0x42, 0x01, 0x59, 0x03, + 0xc6, 0xc6, 0xfd, 0x85, 0x25, 0x52, 0xd2, 0x72, + 0x91, 0x2f, 0x47, 0x40, 0xe1, 0x58, 0x47, 0x61, + 0x8a, 0x86, 0xe2, 0x17, 0xf7, 0x1f, 0x54, 0x19, + 0xd2, 0x5e, 0x10, 0x31, 0xaf, 0xee, 0x58, 0x53, + 0x13, 0x89, 0x64, 0x44, 0x93, 0x4e, 0xb0, 0x4b, + 0x90, 0x3a, 0x68, 0x5b, 0x14, 0x48, 0xb7, 0x55, + 0xd5, 0x6f, 0x70, 0x1a, 0xfe, 0x9b, 0xe2, 0xce, + }, + }, + { + .data_len = 1, + .digest = { + 0x6f, 0x2e, 0xcc, 0x83, 0x53, 0xa3, 0x20, 0x16, + 0x5b, 0xda, 0xd0, 0x04, 0xd3, 0xcb, 0xe4, 0x37, + 0x5b, 0xf0, 0x84, 0x36, 0xe1, 0xad, 0x45, 0xcc, + 0x4d, 0x7f, 0x09, 0x68, 0xb2, 0x62, 0x93, 0x7f, + 0x72, 0x32, 0xe8, 0xa7, 0x2f, 0x1f, 0x6f, 0xc6, + 0x14, 0xd6, 0x70, 0xae, 0x0c, 0xf0, 0xf3, 0xce, + 0x64, 0x4d, 0x22, 0xdf, 0xc7, 0xa7, 0xf8, 0xa8, + 0x18, 0x23, 0xd8, 0x6c, 0xaf, 0x65, 0xa2, 0x54, + }, + }, + { + .data_len = 2, + .digest = { + 0x04, 0x13, 0xe2, 0x10, 0xbe, 0x65, 0xde, 0xce, + 0x61, 0xa8, 0xe0, 0xd6, 0x35, 0xb1, 0xb8, 0x88, + 0xd2, 0xea, 0x45, 0x3a, 0xe1, 0x8d, 0x94, 0xb5, + 0x66, 0x06, 0x98, 0x96, 0x39, 0xf8, 0x0e, 0xcb, + 0x34, 0xa6, 0xa8, 0x17, 0xfe, 0x56, 0xbc, 0xa9, + 0x5e, 0x1b, 0xb1, 0xde, 0x3c, 0xc7, 0x78, 0x4f, + 0x39, 0xc6, 0xfc, 0xa8, 0xb3, 0x27, 0x66, 0x3e, + 0x4e, 0xb5, 0x5d, 0x08, 0x89, 0xee, 0xd1, 0xe0, + }, + }, + { + .data_len = 3, + .digest = { + 0x2b, 0x4a, 0xa3, 0x4e, 0x2b, 0x7a, 0x47, 0x20, + 0x30, 0x5b, 0x09, 0x17, 0x3a, 0xf4, 0xcc, 0xf0, + 0xf7, 0x7b, 0x97, 0x68, 0x98, 0x9f, 0x4f, 0x09, + 0x46, 0x25, 0xe7, 0xd6, 0x53, 0x6b, 0xf9, 0x68, + 0x48, 0x12, 0x44, 0x8c, 0x9a, 0xc8, 0xd4, 0x42, + 0xeb, 0x2c, 0x5f, 0x41, 0xba, 0x17, 0xd0, 0xc3, + 0xad, 0xfd, 0xfb, 0x42, 0x33, 0xcb, 0x08, 0x5d, + 0xd2, 0x5c, 0x3d, 0xde, 0x87, 0x4d, 0xd6, 0xe4, + }, + }, + { + .data_len = 16, + .digest = { + 0xbf, 0x40, 0xf2, 0x38, 0x44, 0x8e, 0x24, 0x5e, + 0xbc, 0x67, 0xbb, 0xf0, 0x10, 0x9a, 0x79, 0xbb, + 0x36, 0x55, 0xce, 0xd2, 0xba, 0x04, 0x0d, 0xe8, + 0x30, 0x29, 0x5c, 0x2a, 0xa6, 0x3a, 0x4f, 0x37, + 0xac, 0x5f, 0xd4, 0x13, 0xa2, 0xf4, 0xfe, 0x80, + 0x61, 0xd7, 0x58, 0x66, 0x0c, 0x7f, 0xa2, 0x56, + 0x6b, 0x52, 0x7c, 0x22, 0x73, 0x7f, 0x17, 0xaa, + 0x91, 0x5a, 0x22, 0x06, 0xd9, 0x00, 0x48, 0x12, + }, + }, + { + .data_len = 32, + .digest = { + 0x41, 0x04, 0x65, 0x93, 0x81, 0x9a, 0x20, 0x0a, + 0x00, 0x60, 0x00, 0x64, 0x4c, 0x04, 0x3d, 0xe0, + 0x6b, 0x17, 0x0c, 0xe1, 0x0e, 0x28, 0x8b, 0xa0, + 0x76, 0xd2, 0x79, 0xb0, 0x33, 0x60, 0x61, 0x27, + 0xf2, 0x64, 0xf1, 0x8a, 0xe5, 0x3e, 0xaa, 0x37, + 0x60, 0xad, 0x2d, 0x75, 0x13, 0xae, 0xd8, 0x9e, + 0xec, 0xe0, 0xe4, 0x40, 0x2f, 0x59, 0x44, 0xb0, + 0x66, 0x7a, 0x68, 0x38, 0xce, 0x21, 0x99, 0x2a, + }, + }, + { + .data_len = 48, + .digest = { + 0x19, 0x6f, 0x9d, 0xc7, 0x87, 0x12, 0x5c, 0xa3, + 0xe2, 0xd3, 0xf1, 0x82, 0xec, 0xf3, 0x55, 0x9c, + 0x86, 0xd1, 0x6d, 0xde, 0xcf, 0x5b, 0xec, 0x4c, + 0x43, 0x25, 0x85, 0x90, 0xef, 0xe8, 0xe3, 0x5f, + 0x2c, 0x3a, 0x84, 0x07, 0xb8, 0x55, 0xfd, 0x5e, + 0xa4, 0x45, 0xf2, 0xac, 0xe4, 0xbd, 0xc7, 0x96, + 0x80, 0x59, 0x3e, 0xc9, 0xb1, 0x60, 0xb1, 0x2b, + 0x17, 0x49, 0x7d, 0x3e, 0x7d, 0x4d, 0x70, 0x24, + }, + }, + { + .data_len = 49, + .digest = { + 0x73, 0x72, 0xd5, 0x0a, 0x97, 0xb4, 0x7d, 0xdb, + 0x05, 0x14, 0x8e, 0x40, 0xc2, 0x9a, 0x8a, 0x74, + 0x4b, 0xda, 0x7e, 0xfc, 0x97, 0x57, 0x23, 0x39, + 0xdc, 0x57, 0x09, 0x13, 0x24, 0xfc, 0xf3, 0x23, + 0x55, 0x48, 0xdd, 0xe5, 0x07, 0x9a, 0x6f, 0x7b, + 0x62, 0xea, 0x4d, 0x79, 0xb4, 0xb9, 0xc5, 0x86, + 0xc0, 0x34, 0xd6, 0xd2, 0x6c, 0xc3, 0x94, 0xfb, + 0x34, 0xd6, 0x62, 0xae, 0xb8, 0x99, 0xf1, 0x38, + }, + }, + { + .data_len = 63, + .digest = { + 0x42, 0x3a, 0xe3, 0xa2, 0xae, 0x5a, 0x28, 0xce, + 0xf1, 0x3c, 0x97, 0xc2, 0x34, 0xf6, 0xb5, 0x1e, + 0xfc, 0x31, 0xb4, 0x04, 0x61, 0xb7, 0x54, 0x0b, + 0x0d, 0x1a, 0x22, 0x9c, 0x04, 0x67, 0x5c, 0x4c, + 0x75, 0x1b, 0x10, 0x0b, 0x99, 0xe2, 0xb1, 0x5e, + 0x5d, 0x4b, 0x7a, 0xe6, 0xf6, 0xb5, 0x62, 0xee, + 0x2d, 0x44, 0x57, 0xb2, 0x96, 0x73, 0x5e, 0xb9, + 0x6a, 0xb2, 0xb3, 0x16, 0xa3, 0xd9, 0x6a, 0x60, + }, + }, + { + .data_len = 64, + .digest = { + 0x50, 0xb9, 0xbe, 0xb2, 0x69, 0x07, 0x45, 0x5b, + 0x59, 0xde, 0x8d, 0xbf, 0x08, 0xdc, 0x2e, 0x7f, + 0x93, 0x29, 0xc1, 0x91, 0xe8, 0x74, 0x03, 0x89, + 0x20, 0xfb, 0xb2, 0x4b, 0xe8, 0x68, 0x6f, 0xe1, + 0xb4, 0x30, 0xbe, 0x11, 0x3c, 0x43, 0x19, 0x66, + 0x72, 0x78, 0xb7, 0xf4, 0xe9, 0x09, 0x18, 0x4e, + 0xae, 0x4a, 0x24, 0xe0, 0x6f, 0x44, 0x02, 0xe3, + 0xfd, 0xda, 0xb3, 0x3e, 0x3c, 0x6d, 0x54, 0x2e, + }, + }, + { + .data_len = 65, + .digest = { + 0xd6, 0xf2, 0xa9, 0x61, 0x3f, 0xce, 0x2a, 0x68, + 0x19, 0x86, 0xff, 0xd1, 0xee, 0x89, 0x3b, 0xa4, + 0x10, 0x9a, 0x91, 0x50, 0x35, 0x48, 0x9e, 0xf5, + 0x9c, 0x95, 0xe0, 0xfb, 0x92, 0x0f, 0xa8, 0xf7, + 0x6c, 0x43, 0x85, 0xf1, 0x6e, 0x11, 0x4e, 0x67, + 0x78, 0xd7, 0x53, 0x25, 0x0c, 0xf8, 0xce, 0x38, + 0x74, 0x08, 0xb0, 0x3c, 0x53, 0x20, 0x4d, 0xc4, + 0x9a, 0xf5, 0x78, 0xe8, 0x41, 0x8f, 0xed, 0x1f, + }, + }, + { + .data_len = 127, + .digest = { + 0xe8, 0xb2, 0xc5, 0xa7, 0xf5, 0xfa, 0xee, 0xa0, + 0x57, 0xba, 0x58, 0xf9, 0x0a, 0xf2, 0x64, 0x16, + 0xa8, 0xa6, 0x03, 0x85, 0x3b, 0xb8, 0x6f, 0xca, + 0x76, 0xc3, 0xa1, 0x2b, 0xec, 0xef, 0xc4, 0x66, + 0x11, 0xdf, 0x03, 0x85, 0x9d, 0x0c, 0x37, 0x7b, + 0xa9, 0x7b, 0x44, 0xfb, 0x11, 0x8f, 0x3f, 0x71, + 0xcd, 0x81, 0x43, 0x2e, 0x71, 0x5c, 0x54, 0x9f, + 0xca, 0x0f, 0x01, 0x91, 0xca, 0xaa, 0x93, 0xe9, + }, + }, + { + .data_len = 128, + .digest = { + 0x05, 0x8e, 0x9d, 0xdc, 0xe9, 0x36, 0x3e, 0x73, + 0x63, 0x59, 0x69, 0x81, 0x0b, 0x8c, 0xc7, 0x9e, + 0xcc, 0xe7, 0x9c, 0x19, 0x54, 0xa7, 0x2f, 0x86, + 0xb5, 0xea, 0xae, 0x6d, 0xfe, 0x4e, 0x6e, 0x83, + 0x8d, 0x1a, 0x1c, 0x70, 0x3f, 0x34, 0xa1, 0x04, + 0x59, 0xd1, 0xbb, 0xaa, 0x58, 0xf7, 0xce, 0xfb, + 0x86, 0x66, 0x22, 0xfc, 0x78, 0x74, 0x6e, 0x85, + 0xf1, 0x59, 0x7d, 0x9e, 0x1c, 0x3b, 0xc6, 0x65, + }, + }, + { + .data_len = 129, + .digest = { + 0x6b, 0x1f, 0x7c, 0x9a, 0x65, 0x7f, 0x09, 0x61, + 0xe5, 0x04, 0x9a, 0xf1, 0x4b, 0x36, 0x8e, 0x41, + 0x86, 0xcf, 0x86, 0x19, 0xd8, 0xc9, 0x34, 0x70, + 0x67, 0xd1, 0x03, 0x72, 0x12, 0xf7, 0x27, 0x92, + 0x2e, 0x3d, 0x2b, 0x54, 0x9a, 0x48, 0xa4, 0xc2, + 0x61, 0xea, 0x6a, 0xe8, 0xdd, 0x07, 0x41, 0x85, + 0x58, 0x6d, 0xcd, 0x12, 0x0d, 0xbc, 0xb1, 0x23, + 0xb2, 0xdb, 0x24, 0x1f, 0xc4, 0xa7, 0xae, 0xda, + }, + }, + { + .data_len = 256, + .digest = { + 0x50, 0xd8, 0xdc, 0xb2, 0x50, 0x24, 0x7a, 0x49, + 0xb1, 0x00, 0x73, 0x16, 0x1f, 0xce, 0xf9, 0xe8, + 0x77, 0x0a, 0x27, 0x74, 0xc7, 0xeb, 0xf0, 0x62, + 0xb9, 0xf3, 0x24, 0xa6, 0x03, 0x18, 0x40, 0xde, + 0x9b, 0x1d, 0xa8, 0xd0, 0xbf, 0x66, 0xa3, 0xc1, + 0x31, 0x04, 0x95, 0xc7, 0xc3, 0xb7, 0x11, 0xe2, + 0x1e, 0x31, 0x49, 0x98, 0x06, 0xab, 0xf0, 0xe6, + 0x5c, 0xac, 0x88, 0x28, 0x0b, 0x3d, 0xb2, 0xc2, + }, + }, + { + .data_len = 511, + .digest = { + 0xd4, 0x2b, 0x6b, 0x9e, 0xfc, 0x44, 0xc0, 0x90, + 0x64, 0x77, 0x5d, 0xf3, 0x44, 0xb6, 0x92, 0x8f, + 0x80, 0xe2, 0xe4, 0x9b, 0xaf, 0x49, 0x04, 0xea, + 0x29, 0xf7, 0x4a, 0x33, 0x3f, 0xc7, 0x3b, 0xab, + 0xa1, 0x71, 0x7f, 0xa2, 0x8e, 0x03, 0xa0, 0xd6, + 0xa7, 0xcd, 0xe0, 0xf8, 0xd7, 0x3b, 0xa4, 0x0d, + 0x84, 0x79, 0x12, 0x72, 0x3f, 0x8e, 0x48, 0x35, + 0x76, 0x4f, 0x56, 0xe9, 0x21, 0x40, 0x19, 0xbe, + }, + }, + { + .data_len = 513, + .digest = { + 0x84, 0xd4, 0xd8, 0x6c, 0x60, 0x3d, 0x6e, 0xfd, + 0x84, 0xb7, 0xdf, 0xba, 0x13, 0x5e, 0x07, 0x94, + 0x5b, 0x6b, 0x62, 0x1d, 0x82, 0x02, 0xa7, 0xb3, + 0x21, 0xdf, 0x42, 0x20, 0x85, 0xa8, 0x6f, 0x30, + 0xf7, 0x03, 0xba, 0x66, 0x0e, 0xa6, 0x42, 0x21, + 0x37, 0xe8, 0xed, 0x5b, 0x22, 0xf5, 0x4e, 0xa5, + 0xe5, 0x80, 0x1b, 0x47, 0xf0, 0x49, 0xb3, 0xe5, + 0x6e, 0xd9, 0xd9, 0x95, 0x3d, 0x2e, 0x42, 0x13, + }, + }, + { + .data_len = 1000, + .digest = { + 0x71, 0x17, 0xab, 0x93, 0xfe, 0x3b, 0xa4, 0xe6, + 0xcb, 0xb0, 0xea, 0x95, 0xe7, 0x1a, 0x01, 0xc0, + 0x12, 0x33, 0xfe, 0xcc, 0x79, 0x15, 0xae, 0x56, + 0xd2, 0x70, 0x44, 0x60, 0x54, 0x42, 0xa8, 0x69, + 0x7e, 0xc3, 0x90, 0xa0, 0x0c, 0x63, 0x39, 0xff, + 0x55, 0x53, 0xb8, 0x46, 0xef, 0x06, 0xcb, 0xba, + 0x73, 0xf4, 0x76, 0x22, 0xf1, 0x60, 0x98, 0xbc, + 0xbf, 0x76, 0x95, 0x85, 0x13, 0x1d, 0x11, 0x3b, + }, + }, + { + .data_len = 3333, + .digest = { + 0x3a, 0xaa, 0x85, 0xa0, 0x8c, 0x8e, 0xe1, 0x9c, + 0x9b, 0x43, 0x72, 0x7f, 0x40, 0x88, 0x3b, 0xd1, + 0xc4, 0xd8, 0x2b, 0x69, 0xa6, 0x74, 0x47, 0x69, + 0x5f, 0x7d, 0xab, 0x75, 0xa9, 0xf9, 0x88, 0x54, + 0xce, 0x57, 0xcc, 0x9d, 0xac, 0x13, 0x91, 0xdb, + 0x6d, 0x5c, 0xd8, 0xf4, 0x35, 0xc9, 0x30, 0xf0, + 0x4b, 0x91, 0x25, 0xab, 0x92, 0xa8, 0xc8, 0x6f, + 0xa0, 0xeb, 0x71, 0x56, 0x95, 0xab, 0xfd, 0xd7, + }, + }, + { + .data_len = 4096, + .digest = { + 0xe1, 0xe9, 0xbe, 0x6c, 0x96, 0xe2, 0xe8, 0xa6, + 0x53, 0xcd, 0x79, 0x77, 0x57, 0x51, 0x2f, 0xb2, + 0x9f, 0xfc, 0x09, 0xaa, 0x2c, 0xbc, 0x6c, 0x5f, + 0xb0, 0xf2, 0x12, 0x39, 0x54, 0xd7, 0x27, 0xf8, + 0x33, 0x5d, 0xd4, 0x8a, 0xca, 0xd8, 0x2e, 0xbb, + 0x02, 0x82, 0xca, 0x1b, 0x54, 0xfa, 0xd6, 0xf4, + 0x49, 0x63, 0xfc, 0xc8, 0x73, 0xd4, 0x26, 0x8d, + 0x4f, 0x1c, 0x56, 0xa7, 0xf4, 0x58, 0x6f, 0x51, + }, + }, + { + .data_len = 4128, + .digest = { + 0xf2, 0xf6, 0xe1, 0x16, 0x98, 0x69, 0x74, 0x5f, + 0x6c, 0xc4, 0x9d, 0x34, 0xa2, 0x84, 0x5d, 0x47, + 0xac, 0x39, 0xe0, 0x14, 0x2d, 0x78, 0xfa, 0x27, + 0xd5, 0x18, 0xaf, 0x26, 0x89, 0xa4, 0x69, 0xd3, + 0x56, 0xde, 0xfe, 0x4b, 0x9f, 0x0c, 0x9d, 0x5a, + 0x9a, 0x73, 0x3e, 0x3c, 0x76, 0x4b, 0x96, 0xca, + 0x49, 0xda, 0x05, 0x8c, 0x53, 0xbb, 0x85, 0x89, + 0x60, 0xc7, 0xe0, 0xb3, 0x51, 0x18, 0xd2, 0xd2, + }, + }, + { + .data_len = 4160, + .digest = { + 0xfc, 0x5c, 0xcf, 0xbf, 0x29, 0xe3, 0x01, 0xef, + 0x4b, 0x40, 0x70, 0x01, 0xca, 0x4d, 0x46, 0xce, + 0xa9, 0x95, 0x5d, 0xb4, 0xf1, 0x79, 0x29, 0xdb, + 0xac, 0x32, 0x3d, 0xd9, 0x60, 0x9e, 0x6b, 0xb8, + 0x28, 0x62, 0xb7, 0x4a, 0xbb, 0x33, 0xb9, 0xd0, + 0x83, 0xe0, 0xd7, 0x5a, 0x2d, 0x01, 0x4c, 0x61, + 0x9e, 0x7d, 0x2d, 0x2d, 0x60, 0x29, 0x5e, 0x60, + 0x10, 0xb7, 0x41, 0x00, 0x3f, 0xe5, 0xf7, 0x52, + }, + }, + { + .data_len = 4224, + .digest = { + 0xf8, 0xe5, 0x4b, 0xe5, 0x89, 0xf9, 0x1b, 0x43, + 0xbb, 0x65, 0x3d, 0xa0, 0xb4, 0xdc, 0x04, 0x26, + 0x68, 0x15, 0xae, 0x4d, 0xd6, 0x03, 0xb7, 0x27, + 0x06, 0x8c, 0x2a, 0x82, 0x51, 0x96, 0xbf, 0x83, + 0x38, 0x96, 0x21, 0x8a, 0xd9, 0xf9, 0x4e, 0x38, + 0xc6, 0xb3, 0xbd, 0xfe, 0xd3, 0x49, 0x90, 0xbc, + 0xa1, 0x77, 0xd0, 0xa0, 0x3c, 0x2b, 0x4e, 0x10, + 0x34, 0xc3, 0x17, 0x85, 0x3d, 0xec, 0xa8, 0x05, + }, + }, + { + .data_len = 16384, + .digest = { + 0x38, 0x56, 0xaf, 0x83, 0x68, 0x9c, 0xba, 0xe3, + 0xec, 0x51, 0xf5, 0xf4, 0x93, 0x48, 0x1d, 0xe6, + 0xad, 0xa8, 0x8c, 0x70, 0x2a, 0xd9, 0xaa, 0x43, + 0x04, 0x40, 0x95, 0xc1, 0xe6, 0x8a, 0xf5, 0x01, + 0x6b, 0x79, 0xd9, 0xb4, 0xd0, 0x1d, 0x93, 0x26, + 0xfe, 0xf5, 0x07, 0x57, 0xda, 0x08, 0x0a, 0x82, + 0xc9, 0x17, 0x13, 0x5b, 0x9e, 0x11, 0x96, 0xa5, + 0xd0, 0x92, 0xcd, 0xf1, 0xa3, 0x5b, 0x43, 0x21, + }, + }, +}; + +static const u8 hash_testvec_consolidated[BLAKE2B_HASH_SIZE] = { + 0xa4, 0xf8, 0xf6, 0xa1, 0x36, 0x89, 0xc0, 0x2a, + 0xc3, 0x42, 0x32, 0x71, 0xe5, 0xea, 0x14, 0x77, + 0xf3, 0x99, 0x91, 0x87, 0x49, 0xc2, 0x8d, 0xa5, + 0x2f, 0xed, 0x01, 0x35, 0x39, 0x64, 0x09, 0x25, + 0xe3, 0xa8, 0x50, 0x97, 0x35, 0x8b, 0xf5, 0x19, + 0x1e, 0xd5, 0x9f, 0x03, 0x0b, 0x65, 0x55, 0x0e, + 0xa0, 0xb7, 0xda, 0x18, 0x7b, 0x7f, 0x88, 0x55, + 0x1f, 0xdb, 0x82, 0x6b, 0x98, 0x90, 0x1c, 0xdd, +}; + +static const u8 blake2b_keyed_testvec_consolidated[BLAKE2B_HASH_SIZE] = { + 0x2b, 0x89, 0x36, 0x3a, 0x36, 0xe4, 0x18, 0x38, + 0xc4, 0x5b, 0x5c, 0xa5, 0x9a, 0xed, 0xf2, 0xee, + 0x5a, 0xb6, 0x82, 0x6c, 0x63, 0xf2, 0x29, 0x57, + 0xc7, 0xd5, 0x32, 0x27, 0xba, 0x88, 0xb1, 0xab, + 0xf2, 0x2a, 0xc1, 0xea, 0xf3, 0x91, 0x89, 0x66, + 0x47, 0x1e, 0x5b, 0xc6, 0x98, 0x12, 0xe9, 0x25, + 0xbf, 0x72, 0xd2, 0x3f, 0x88, 0x97, 0x17, 0x51, + 0xed, 0x96, 0xfb, 0xe9, 0xca, 0x52, 0x42, 0xc9, +}; diff --git a/lib/crypto/tests/blake2b_kunit.c b/lib/crypto/tests/blake2b_kunit.c new file mode 100644 index 000000000000..bc0be7da1e76 --- /dev/null +++ b/lib/crypto/tests/blake2b_kunit.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "blake2b-testvecs.h" + +/* + * The following are compatibility functions that present BLAKE2b as an unkeyed + * hash function that produces hashes of fixed length BLAKE2B_HASH_SIZE, so that + * hash-test-template.h can be reused to test it. + */ + +static void blake2b_default(const u8 *data, size_t len, + u8 out[BLAKE2B_HASH_SIZE]) +{ + blake2b(NULL, 0, data, len, out, BLAKE2B_HASH_SIZE); +} + +static void blake2b_init_default(struct blake2b_ctx *ctx) +{ + blake2b_init(ctx, BLAKE2B_HASH_SIZE); +} + +/* + * Generate the HASH_KUNIT_CASES using hash-test-template.h. These test BLAKE2b + * with a key length of 0 and a hash length of BLAKE2B_HASH_SIZE. + */ +#define HASH blake2b_default +#define HASH_CTX blake2b_ctx +#define HASH_SIZE BLAKE2B_HASH_SIZE +#define HASH_INIT blake2b_init_default +#define HASH_UPDATE blake2b_update +#define HASH_FINAL blake2b_final +#include "hash-test-template.h" + +/* + * BLAKE2b specific test case which tests all possible combinations of key + * length and hash length. + */ +static void test_blake2b_all_key_and_hash_lens(struct kunit *test) +{ + const size_t data_len = 100; + u8 *data = &test_buf[0]; + u8 *key = data + data_len; + u8 *hash = key + BLAKE2B_KEY_SIZE; + struct blake2b_ctx main_ctx; + u8 main_hash[BLAKE2B_HASH_SIZE]; + + rand_bytes_seeded_from_len(data, data_len); + blake2b_init(&main_ctx, BLAKE2B_HASH_SIZE); + for (int key_len = 0; key_len <= BLAKE2B_KEY_SIZE; key_len++) { + rand_bytes_seeded_from_len(key, key_len); + for (int out_len = 1; out_len <= BLAKE2B_HASH_SIZE; out_len++) { + blake2b(key, key_len, data, data_len, hash, out_len); + blake2b_update(&main_ctx, hash, out_len); + } + } + blake2b_final(&main_ctx, main_hash); + KUNIT_ASSERT_MEMEQ(test, main_hash, blake2b_keyed_testvec_consolidated, + BLAKE2B_HASH_SIZE); +} + +/* + * BLAKE2b specific test case which tests using a guarded buffer for all allowed + * key lengths. Also tests both blake2b() and blake2b_init_key(). + */ +static void test_blake2b_with_guarded_key_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int key_len = 0; key_len <= BLAKE2B_KEY_SIZE; key_len++) { + u8 key[BLAKE2B_KEY_SIZE]; + u8 *guarded_key = &test_buf[TEST_BUF_LEN - key_len]; + u8 hash1[BLAKE2B_HASH_SIZE]; + u8 hash2[BLAKE2B_HASH_SIZE]; + struct blake2b_ctx ctx; + + rand_bytes(key, key_len); + memcpy(guarded_key, key, key_len); + + blake2b(key, key_len, test_buf, data_len, + hash1, BLAKE2B_HASH_SIZE); + blake2b(guarded_key, key_len, test_buf, data_len, + hash2, BLAKE2B_HASH_SIZE); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2B_HASH_SIZE); + + blake2b_init_key(&ctx, BLAKE2B_HASH_SIZE, guarded_key, key_len); + blake2b_update(&ctx, test_buf, data_len); + blake2b_final(&ctx, hash2); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2B_HASH_SIZE); + } +} + +/* + * BLAKE2b specific test case which tests using a guarded output buffer for all + * allowed output lengths. + */ +static void test_blake2b_with_guarded_out_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int out_len = 1; out_len <= BLAKE2B_HASH_SIZE; out_len++) { + u8 hash[BLAKE2B_HASH_SIZE]; + u8 *guarded_hash = &test_buf[TEST_BUF_LEN - out_len]; + + blake2b(NULL, 0, test_buf, data_len, hash, out_len); + blake2b(NULL, 0, test_buf, data_len, guarded_hash, out_len); + KUNIT_ASSERT_MEMEQ(test, hash, guarded_hash, out_len); + } +} + +static struct kunit_case blake2b_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(test_blake2b_all_key_and_hash_lens), + KUNIT_CASE(test_blake2b_with_guarded_key_buf), + KUNIT_CASE(test_blake2b_with_guarded_out_buf), + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite blake2b_test_suite = { + .name = "blake2b", + .test_cases = blake2b_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(blake2b_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for BLAKE2b"); +MODULE_LICENSE("GPL"); diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index fc063f2ee95f..c5b7985fe728 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -85,8 +85,8 @@ def print_c_struct_u8_array_field(name, value): print('\t\t},') def alg_digest_size_const(alg): - if alg == 'blake2s': - return 'BLAKE2S_HASH_SIZE' + if alg.startswith('blake2'): + return f'{alg.upper()}_HASH_SIZE' return f'{alg.upper()}_DIGEST_SIZE' def gen_unkeyed_testvecs(alg): @@ -124,19 +124,22 @@ def gen_hmac_testvecs(alg): f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', ctx.digest()) -BLAKE2S_KEY_SIZE = 32 -BLAKE2S_HASH_SIZE = 32 - -def gen_additional_blake2s_testvecs(): +def gen_additional_blake2_testvecs(alg): + if alg == 'blake2s': + (max_key_size, max_hash_size) = (32, 32) + elif alg == 'blake2b': + (max_key_size, max_hash_size) = (64, 64) + else: + raise ValueError(f'Unsupported alg: {alg}') hashes = b'' - for key_len in range(BLAKE2S_KEY_SIZE + 1): - for out_len in range(1, BLAKE2S_HASH_SIZE + 1): - h = hashlib.blake2s(digest_size=out_len, key=rand_bytes(key_len)) + for key_len in range(max_key_size + 1): + for out_len in range(1, max_hash_size + 1): + h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) h.update(rand_bytes(100)) hashes += h.digest() print_static_u8_array_definition( - 'blake2s_keyed_testvec_consolidated[BLAKE2S_HASH_SIZE]', - compute_hash('blake2s', hashes)) + f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', + compute_hash(alg, hashes)) def gen_additional_poly1305_testvecs(): key = b'\xff' * POLY1305_KEY_SIZE @@ -160,8 +163,8 @@ alg = sys.argv[1] print('/* SPDX-License-Identifier: GPL-2.0-or-later */') print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') gen_unkeyed_testvecs(alg) -if alg == 'blake2s': - gen_additional_blake2s_testvecs() +if alg.startswith('blake2'): + gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': gen_additional_poly1305_testvecs() else: -- cgit v1.2.3 From 15c64c47e48472875c2b85838581843f05057787 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 25 Oct 2025 22:50:22 -0700 Subject: lib/crypto: tests: Add SHA3 kunit tests Add a SHA3 kunit test suite, providing the following: (*) A simple test of each of SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128 and SHAKE256. (*) NIST 0- and 1600-bit test vectors for SHAKE128 and SHAKE256. (*) Output tiling (multiple squeezing) tests for SHAKE256. (*) Standard hash template test for SHA3-256. To make this possible, gen-hash-testvecs.py is modified to support sha3-256. (*) Standard benchmark test for SHA3-256. [EB: dropped some unnecessary changes to gen-hash-testvecs.py, moved addition of Testing section in doc file into this commit, and other small cleanups] Signed-off-by: David Howells Reviewed-by: Ard Biesheuvel Tested-by: Harald Freudenberger Link: https://lore.kernel.org/r/20251026055032.1413733-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/crypto/sha3.rst | 11 ++ lib/crypto/tests/Kconfig | 11 ++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/sha3-testvecs.h | 231 ++++++++++++++++++++++++ lib/crypto/tests/sha3_kunit.c | 344 ++++++++++++++++++++++++++++++++++++ scripts/crypto/gen-hash-testvecs.py | 4 +- 6 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 lib/crypto/tests/sha3-testvecs.h create mode 100644 lib/crypto/tests/sha3_kunit.c (limited to 'scripts') diff --git a/Documentation/crypto/sha3.rst b/Documentation/crypto/sha3.rst index b705e70691d7..37640f295118 100644 --- a/Documentation/crypto/sha3.rst +++ b/Documentation/crypto/sha3.rst @@ -107,6 +107,17 @@ Once all the desired output has been extracted, zeroize the context:: void shake_zeroize_ctx(struct shake_ctx *ctx); +Testing +======= + +To test the SHA-3 code, use sha3_kunit (CONFIG_CRYPTO_LIB_SHA3_KUNIT_TEST). + +Since the SHA-3 algorithms are FIPS-approved, when the kernel is booted in FIPS +mode the SHA-3 library also performs a simple self-test. This is purely to meet +a FIPS requirement. Normal testing done by kernel developers and integrators +should use the much more comprehensive KUnit test suite instead. + + References ========== diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 2ebfd681bae4..140afd1714ba 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -81,6 +81,17 @@ config CRYPTO_LIB_SHA512_KUNIT_TEST KUnit tests for the SHA-384 and SHA-512 cryptographic hash functions and their corresponding HMACs. +config CRYPTO_LIB_SHA3_KUNIT_TEST + tristate "KUnit tests for SHA-3" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_SHA3 + help + KUnit tests for the SHA3 cryptographic hash and XOF functions, + including SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128 and + SHAKE256. + config CRYPTO_LIB_BENCHMARK_VISIBLE bool diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index f21a48a4415d..f7d1392dc847 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST) += sha224_kunit.o sha256_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA512_KUNIT_TEST) += sha384_kunit.o sha512_kunit.o +obj-$(CONFIG_CRYPTO_LIB_SHA3_KUNIT_TEST) += sha3_kunit.o diff --git a/lib/crypto/tests/sha3-testvecs.h b/lib/crypto/tests/sha3-testvecs.h new file mode 100644 index 000000000000..9c4c403cc6e0 --- /dev/null +++ b/lib/crypto/tests/sha3-testvecs.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py sha3-256 */ + +static const struct { + size_t data_len; + u8 digest[SHA3_256_DIGEST_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0xa7, 0xff, 0xc6, 0xf8, 0xbf, 0x1e, 0xd7, 0x66, + 0x51, 0xc1, 0x47, 0x56, 0xa0, 0x61, 0xd6, 0x62, + 0xf5, 0x80, 0xff, 0x4d, 0xe4, 0x3b, 0x49, 0xfa, + 0x82, 0xd8, 0x0a, 0x4b, 0x80, 0xf8, 0x43, 0x4a, + }, + }, + { + .data_len = 1, + .digest = { + 0x11, 0x03, 0xe7, 0x84, 0x51, 0x50, 0x86, 0x35, + 0x71, 0x8a, 0x70, 0xe3, 0xc4, 0x26, 0x7b, 0x21, + 0x02, 0x13, 0xa0, 0x81, 0xe8, 0xe6, 0x14, 0x25, + 0x07, 0x34, 0xe5, 0xc5, 0x40, 0x06, 0xf2, 0x8b, + }, + }, + { + .data_len = 2, + .digest = { + 0x2f, 0x6f, 0x6d, 0x47, 0x48, 0x52, 0x11, 0xb9, + 0xe4, 0x3d, 0xc8, 0x71, 0xcf, 0xb2, 0xee, 0xae, + 0x5b, 0xf4, 0x12, 0x84, 0x5b, 0x1c, 0xec, 0x6c, + 0xc1, 0x66, 0x88, 0xaa, 0xc3, 0x40, 0xbd, 0x7e, + }, + }, + { + .data_len = 3, + .digest = { + 0xec, 0x02, 0xe8, 0x81, 0x4f, 0x84, 0x41, 0x69, + 0x06, 0xd8, 0xdc, 0x1d, 0x01, 0x78, 0xd7, 0xcb, + 0x39, 0xdf, 0xd3, 0x12, 0x1c, 0x99, 0xfd, 0xf3, + 0x5c, 0x83, 0xc9, 0xc2, 0x7a, 0x7b, 0x6a, 0x05, + }, + }, + { + .data_len = 16, + .digest = { + 0xff, 0x6f, 0xc3, 0x41, 0xc3, 0x5f, 0x34, 0x6d, + 0xa7, 0xdf, 0x3e, 0xc2, 0x8b, 0x29, 0xb6, 0xf1, + 0xf8, 0x67, 0xfd, 0xcd, 0xb1, 0x9f, 0x38, 0x08, + 0x1d, 0x8d, 0xd9, 0xc2, 0x43, 0x66, 0x18, 0x6c, + }, + }, + { + .data_len = 32, + .digest = { + 0xe4, 0xb1, 0x06, 0x17, 0xf8, 0x8b, 0x91, 0x95, + 0xe7, 0x57, 0x66, 0xac, 0x08, 0xb2, 0x03, 0x3e, + 0xf7, 0x84, 0x1f, 0xe3, 0x25, 0xa3, 0x11, 0xd2, + 0x11, 0xa4, 0x78, 0x74, 0x2a, 0x43, 0x20, 0xa5, + }, + }, + { + .data_len = 48, + .digest = { + 0xeb, 0x57, 0x5f, 0x20, 0xa3, 0x6b, 0xc7, 0xb4, + 0x66, 0x2a, 0xa0, 0x30, 0x3b, 0x52, 0x00, 0xc9, + 0xce, 0x6a, 0xd8, 0x1e, 0xbe, 0xed, 0xa1, 0xd1, + 0xbe, 0x63, 0xc7, 0xe1, 0xe2, 0x66, 0x67, 0x0c, + }, + }, + { + .data_len = 49, + .digest = { + 0xf0, 0x67, 0xad, 0x66, 0xbe, 0xec, 0x5a, 0xfd, + 0x29, 0xd2, 0x4f, 0x1d, 0xb2, 0x24, 0xb8, 0x90, + 0x05, 0x28, 0x0e, 0x66, 0x67, 0x74, 0x2d, 0xee, + 0x66, 0x25, 0x11, 0xd1, 0x76, 0xa2, 0xfc, 0x3a, + }, + }, + { + .data_len = 63, + .digest = { + 0x57, 0x56, 0x21, 0xb3, 0x2d, 0x2d, 0xe1, 0x9d, + 0xbf, 0x2c, 0x82, 0xa8, 0xad, 0x7e, 0x6c, 0x46, + 0xfb, 0x30, 0xeb, 0xce, 0xcf, 0xed, 0x2d, 0x65, + 0xe7, 0xe4, 0x96, 0x69, 0xe0, 0x48, 0xd2, 0xb6, + }, + }, + { + .data_len = 64, + .digest = { + 0x7b, 0xba, 0x67, 0x15, 0xe5, 0x21, 0xc4, 0x69, + 0xd3, 0xef, 0x5c, 0x97, 0x9f, 0x5b, 0xba, 0x9c, + 0xfa, 0x55, 0x64, 0xec, 0xb5, 0x37, 0x53, 0x1b, + 0x3f, 0x4c, 0x0a, 0xed, 0x51, 0x98, 0x2b, 0x52, + }, + }, + { + .data_len = 65, + .digest = { + 0x44, 0xb6, 0x6b, 0x83, 0x09, 0x83, 0x55, 0x83, + 0xde, 0x1f, 0xcc, 0x33, 0xef, 0xdc, 0x05, 0xbb, + 0x3b, 0x63, 0x76, 0x45, 0xe4, 0x8e, 0x14, 0x7a, + 0x2d, 0xae, 0x90, 0xce, 0x68, 0xc3, 0xa4, 0xf2, + }, + }, + { + .data_len = 127, + .digest = { + 0x50, 0x3e, 0x99, 0x4e, 0x28, 0x2b, 0xc9, 0xf4, + 0xf5, 0xeb, 0x2b, 0x16, 0x04, 0x2d, 0xf5, 0xbe, + 0xc0, 0x91, 0x41, 0x2a, 0x8e, 0x69, 0x5e, 0x39, + 0x53, 0x2c, 0xc1, 0x18, 0xa5, 0xeb, 0xd8, 0xda, + }, + }, + { + .data_len = 128, + .digest = { + 0x90, 0x0b, 0xa6, 0x92, 0x84, 0x30, 0xaf, 0xee, + 0x38, 0x59, 0x83, 0x83, 0xe9, 0xfe, 0xab, 0x86, + 0x79, 0x1b, 0xcd, 0xe7, 0x0a, 0x0f, 0x58, 0x53, + 0x36, 0xab, 0x12, 0xe1, 0x5c, 0x97, 0xc1, 0xfb, + }, + }, + { + .data_len = 129, + .digest = { + 0x2b, 0x52, 0x1e, 0x54, 0xbe, 0x38, 0x4c, 0x3e, + 0x73, 0x37, 0x18, 0xf5, 0x25, 0x2c, 0xc8, 0xc7, + 0xda, 0x7e, 0xb6, 0x47, 0x9d, 0xf4, 0x46, 0xce, + 0xfa, 0x80, 0x20, 0x6b, 0xbd, 0xfd, 0x2a, 0xd8, + }, + }, + { + .data_len = 256, + .digest = { + 0x45, 0xf0, 0xf5, 0x9b, 0xd9, 0x91, 0x26, 0xd5, + 0x91, 0x3b, 0xf8, 0x87, 0x8b, 0x34, 0x02, 0x31, + 0x64, 0xab, 0xf4, 0x1c, 0x6e, 0x34, 0x72, 0xdf, + 0x32, 0x6d, 0xe5, 0xd2, 0x67, 0x5e, 0x86, 0x93, + }, + }, + { + .data_len = 511, + .digest = { + 0xb3, 0xaf, 0x71, 0x64, 0xfa, 0xd4, 0xf1, 0x07, + 0x38, 0xef, 0x04, 0x8e, 0x89, 0xf4, 0x02, 0xd2, + 0xa5, 0xaf, 0x3b, 0xf5, 0x67, 0x56, 0xcf, 0xa9, + 0x8e, 0x43, 0xf5, 0xb5, 0xe3, 0x91, 0x8e, 0xe7, + }, + }, + { + .data_len = 513, + .digest = { + 0x51, 0xac, 0x0a, 0x65, 0xb7, 0x96, 0x20, 0xcf, + 0x88, 0xf6, 0x97, 0x35, 0x89, 0x0d, 0x31, 0x0f, + 0xbe, 0x17, 0xbe, 0x62, 0x03, 0x67, 0xc0, 0xee, + 0x4f, 0xc1, 0xe3, 0x7f, 0x6f, 0xab, 0xac, 0xb4, + }, + }, + { + .data_len = 1000, + .digest = { + 0x7e, 0xea, 0xa8, 0xd7, 0xde, 0x20, 0x1b, 0x58, + 0x24, 0xd8, 0x26, 0x40, 0x36, 0x5f, 0x3f, 0xaa, + 0xe5, 0x5a, 0xea, 0x98, 0x58, 0xd4, 0xd6, 0xfc, + 0x20, 0x4c, 0x5c, 0x4f, 0xaf, 0x56, 0xc7, 0xc3, + }, + }, + { + .data_len = 3333, + .digest = { + 0x61, 0xb1, 0xb1, 0x3e, 0x0e, 0x7e, 0x90, 0x3d, + 0x31, 0x54, 0xbd, 0xc9, 0x0d, 0x53, 0x62, 0xf1, + 0xcd, 0x18, 0x80, 0xf9, 0x91, 0x75, 0x41, 0xb3, + 0x51, 0x39, 0x57, 0xa7, 0xa8, 0x1e, 0xfb, 0xc9, + }, + }, + { + .data_len = 4096, + .digest = { + 0xab, 0x29, 0xda, 0x10, 0xc4, 0x11, 0x2d, 0x5c, + 0xd1, 0xce, 0x1c, 0x95, 0xfa, 0xc6, 0xc7, 0xb0, + 0x1b, 0xd1, 0xdc, 0x6f, 0xa0, 0x9d, 0x1b, 0x23, + 0xfb, 0x6e, 0x90, 0x97, 0xd0, 0x75, 0x44, 0x7a, + }, + }, + { + .data_len = 4128, + .digest = { + 0x02, 0x45, 0x95, 0xf4, 0x19, 0xb5, 0x93, 0x29, + 0x90, 0xf2, 0x63, 0x3f, 0x89, 0xe8, 0xa5, 0x31, + 0x76, 0xf2, 0x89, 0x79, 0x66, 0xd3, 0x96, 0xdf, + 0x33, 0xd1, 0xa6, 0x17, 0x73, 0xb1, 0xd0, 0x45, + }, + }, + { + .data_len = 4160, + .digest = { + 0xd1, 0x8e, 0x22, 0xea, 0x44, 0x87, 0x6e, 0x9d, + 0xfb, 0x36, 0x02, 0x20, 0x63, 0xb7, 0x69, 0x45, + 0x25, 0x41, 0x69, 0xe0, 0x9b, 0x87, 0xcf, 0xa3, + 0x51, 0xbb, 0xfc, 0x8d, 0xf7, 0x29, 0xa7, 0xea, + }, + }, + { + .data_len = 4224, + .digest = { + 0x11, 0x86, 0x7d, 0x84, 0xf9, 0x8c, 0x6e, 0xc4, + 0x64, 0x36, 0xc6, 0xf3, 0x42, 0x92, 0x31, 0x2b, + 0x1e, 0x12, 0xe6, 0x4d, 0xbe, 0xfa, 0x77, 0x3f, + 0x89, 0x41, 0x33, 0x58, 0x1c, 0x98, 0x16, 0x0a, + }, + }, + { + .data_len = 16384, + .digest = { + 0xb2, 0xba, 0x0c, 0x8c, 0x9d, 0xbb, 0x1e, 0xb0, + 0x03, 0xb5, 0xdf, 0x4f, 0xf5, 0x35, 0xdb, 0xec, + 0x60, 0xf2, 0x5b, 0xb6, 0xd0, 0x49, 0xd3, 0xed, + 0x55, 0xc0, 0x7a, 0xd7, 0xaf, 0xa1, 0xea, 0x53, + }, + }, +}; + +static const u8 hash_testvec_consolidated[SHA3_256_DIGEST_SIZE] = { + 0x3b, 0x33, 0x67, 0xf8, 0xea, 0x92, 0x78, 0x62, + 0xdd, 0xbe, 0x72, 0x15, 0xbd, 0x6f, 0xfa, 0xe5, + 0x5e, 0xab, 0x9f, 0xb1, 0xe4, 0x23, 0x7c, 0x2c, + 0x80, 0xcf, 0x09, 0x75, 0xf8, 0xe2, 0xfa, 0x30, +}; diff --git a/lib/crypto/tests/sha3_kunit.c b/lib/crypto/tests/sha3_kunit.c new file mode 100644 index 000000000000..c267984c4aff --- /dev/null +++ b/lib/crypto/tests/sha3_kunit.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#include +#include "sha3-testvecs.h" + +#define HASH sha3_256 +#define HASH_CTX sha3_ctx +#define HASH_SIZE SHA3_256_DIGEST_SIZE +#define HASH_INIT sha3_256_init +#define HASH_UPDATE sha3_update +#define HASH_FINAL sha3_final +#include "hash-test-template.h" + +/* + * Sample message and the output generated for various algorithms by passing it + * into "openssl sha3-224" etc.. + */ +static const u8 test_sha3_sample[] = + "The quick red fox jumped over the lazy brown dog!\n" + "The quick red fox jumped over the lazy brown dog!\n" + "The quick red fox jumped over the lazy brown dog!\n" + "The quick red fox jumped over the lazy brown dog!\n"; + +static const u8 test_sha3_224[8 + SHA3_224_DIGEST_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0xd6, 0xe8, 0xd8, 0x80, 0xfa, 0x42, 0x80, 0x70, + 0x7e, 0x7f, 0xd7, 0xd2, 0xd7, 0x7a, 0x35, 0x65, + 0xf0, 0x0b, 0x4f, 0x9f, 0x2a, 0x33, 0xca, 0x0a, + 0xef, 0xa6, 0x4c, 0xb8, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static const u8 test_sha3_256[8 + SHA3_256_DIGEST_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0xdb, 0x3b, 0xb0, 0xb8, 0x8d, 0x15, 0x78, 0xe5, + 0x78, 0x76, 0x8e, 0x39, 0x7e, 0x89, 0x86, 0xb9, + 0x14, 0x3a, 0x1e, 0xe7, 0x96, 0x7c, 0xf3, 0x25, + 0x70, 0xbd, 0xc3, 0xa9, 0xae, 0x63, 0x71, 0x1d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static const u8 test_sha3_384[8 + SHA3_384_DIGEST_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0x2d, 0x4b, 0x29, 0x85, 0x19, 0x94, 0xaa, 0x31, + 0x9b, 0x04, 0x9d, 0x6e, 0x79, 0x66, 0xc7, 0x56, + 0x8a, 0x2e, 0x99, 0x84, 0x06, 0xcf, 0x10, 0x2d, + 0xec, 0xf0, 0x03, 0x04, 0x1f, 0xd5, 0x99, 0x63, + 0x2f, 0xc3, 0x2b, 0x0d, 0xd9, 0x45, 0xf7, 0xbb, + 0x0a, 0xc3, 0x46, 0xab, 0xfe, 0x4d, 0x94, 0xc2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static const u8 test_sha3_512[8 + SHA3_512_DIGEST_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0xdd, 0x71, 0x3b, 0x44, 0xb6, 0x6c, 0xd7, 0x78, + 0xe7, 0x93, 0xa1, 0x4c, 0xd7, 0x24, 0x16, 0xf1, + 0xfd, 0xa2, 0x82, 0x4e, 0xed, 0x59, 0xe9, 0x83, + 0x15, 0x38, 0x89, 0x7d, 0x39, 0x17, 0x0c, 0xb2, + 0xcf, 0x12, 0x80, 0x78, 0xa1, 0x78, 0x41, 0xeb, + 0xed, 0x21, 0x4c, 0xa4, 0x4a, 0x5f, 0x30, 0x1a, + 0x70, 0x98, 0x4f, 0x14, 0xa2, 0xd1, 0x64, 0x1b, + 0xc2, 0x0a, 0xff, 0x3b, 0xe8, 0x26, 0x41, 0x8f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static const u8 test_shake128[8 + SHAKE128_DEFAULT_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0x41, 0xd6, 0xb8, 0x9c, 0xf8, 0xe8, 0x54, 0xf2, + 0x5c, 0xde, 0x51, 0x12, 0xaf, 0x9e, 0x0d, 0x91, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static const u8 test_shake256[8 + SHAKE256_DEFAULT_SIZE + 8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-before guard */ + 0xab, 0x06, 0xd4, 0xf9, 0x8b, 0xfd, 0xb2, 0xc4, + 0xfe, 0xf1, 0xcc, 0xe2, 0x40, 0x45, 0xdd, 0x15, + 0xcb, 0xdd, 0x02, 0x8d, 0xb7, 0x9f, 0x1e, 0x67, + 0xd6, 0x7f, 0x98, 0x5e, 0x1b, 0x19, 0xf8, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* Write-after guard */ +}; + +static void test_sha3_224_basic(struct kunit *test) +{ + u8 out[8 + SHA3_224_DIGEST_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_sha3_224)); + + memset(out, 0, sizeof(out)); + sha3_224(test_sha3_sample, sizeof(test_sha3_sample) - 1, out + 8); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_sha3_224, sizeof(test_sha3_224), + "SHA3-224 gives wrong output"); +} + +static void test_sha3_256_basic(struct kunit *test) +{ + u8 out[8 + SHA3_256_DIGEST_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_sha3_256)); + + memset(out, 0, sizeof(out)); + sha3_256(test_sha3_sample, sizeof(test_sha3_sample) - 1, out + 8); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_sha3_256, sizeof(test_sha3_256), + "SHA3-256 gives wrong output"); +} + +static void test_sha3_384_basic(struct kunit *test) +{ + u8 out[8 + SHA3_384_DIGEST_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_sha3_384)); + + memset(out, 0, sizeof(out)); + sha3_384(test_sha3_sample, sizeof(test_sha3_sample) - 1, out + 8); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_sha3_384, sizeof(test_sha3_384), + "SHA3-384 gives wrong output"); +} + +static void test_sha3_512_basic(struct kunit *test) +{ + u8 out[8 + SHA3_512_DIGEST_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_sha3_512)); + + memset(out, 0, sizeof(out)); + sha3_512(test_sha3_sample, sizeof(test_sha3_sample) - 1, out + 8); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_sha3_512, sizeof(test_sha3_512), + "SHA3-512 gives wrong output"); +} + +static void test_shake128_basic(struct kunit *test) +{ + u8 out[8 + SHAKE128_DEFAULT_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_shake128)); + + memset(out, 0, sizeof(out)); + shake128(test_sha3_sample, sizeof(test_sha3_sample) - 1, + out + 8, sizeof(out) - 16); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake128, sizeof(test_shake128), + "SHAKE128 gives wrong output"); +} + +static void test_shake256_basic(struct kunit *test) +{ + u8 out[8 + SHAKE256_DEFAULT_SIZE + 8]; + + BUILD_BUG_ON(sizeof(out) != sizeof(test_shake256)); + + memset(out, 0, sizeof(out)); + shake256(test_sha3_sample, sizeof(test_sha3_sample) - 1, + out + 8, sizeof(out) - 16); + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256, sizeof(test_shake256), + "SHAKE256 gives wrong output"); +} + +/* + * Usable NIST tests. + * + * From: https://csrc.nist.gov/projects/cryptographic-standards-and-guidelines/example-values + */ +static const u8 test_nist_1600_sample[] = { + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, + 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3 +}; + +static const u8 test_shake128_nist_0[] = { + 0x7f, 0x9c, 0x2b, 0xa4, 0xe8, 0x8f, 0x82, 0x7d, + 0x61, 0x60, 0x45, 0x50, 0x76, 0x05, 0x85, 0x3e +}; + +static const u8 test_shake128_nist_1600[] = { + 0x13, 0x1a, 0xb8, 0xd2, 0xb5, 0x94, 0x94, 0x6b, + 0x9c, 0x81, 0x33, 0x3f, 0x9b, 0xb6, 0xe0, 0xce, +}; + +static const u8 test_shake256_nist_0[] = { + 0x46, 0xb9, 0xdd, 0x2b, 0x0b, 0xa8, 0x8d, 0x13, + 0x23, 0x3b, 0x3f, 0xeb, 0x74, 0x3e, 0xeb, 0x24, + 0x3f, 0xcd, 0x52, 0xea, 0x62, 0xb8, 0x1b, 0x82, + 0xb5, 0x0c, 0x27, 0x64, 0x6e, 0xd5, 0x76, 0x2f +}; + +static const u8 test_shake256_nist_1600[] = { + 0xcd, 0x8a, 0x92, 0x0e, 0xd1, 0x41, 0xaa, 0x04, + 0x07, 0xa2, 0x2d, 0x59, 0x28, 0x86, 0x52, 0xe9, + 0xd9, 0xf1, 0xa7, 0xee, 0x0c, 0x1e, 0x7c, 0x1c, + 0xa6, 0x99, 0x42, 0x4d, 0xa8, 0x4a, 0x90, 0x4d, +}; + +static void test_shake128_nist(struct kunit *test) +{ + u8 out[SHAKE128_DEFAULT_SIZE]; + + shake128("", 0, out, sizeof(out)); + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake128_nist_0, sizeof(out), + "SHAKE128 gives wrong output for NIST.0"); + + shake128(test_nist_1600_sample, sizeof(test_nist_1600_sample), + out, sizeof(out)); + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake128_nist_1600, sizeof(out), + "SHAKE128 gives wrong output for NIST.1600"); +} + +static void test_shake256_nist(struct kunit *test) +{ + u8 out[SHAKE256_DEFAULT_SIZE]; + + shake256("", 0, out, sizeof(out)); + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256_nist_0, sizeof(out), + "SHAKE256 gives wrong output for NIST.0"); + + shake256(test_nist_1600_sample, sizeof(test_nist_1600_sample), + out, sizeof(out)); + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256_nist_1600, sizeof(out), + "SHAKE256 gives wrong output for NIST.1600"); +} + +/* + * Output tiling test of SHAKE256; equal output tiles barring the last. A + * series of squeezings of the same context should, if laid end-to-end, match a + * single squeezing of the combined size. + */ +static void test_shake256_tiling(struct kunit *test) +{ + struct shake_ctx ctx; + u8 out[8 + SHA3_512_DIGEST_SIZE + 8]; + + for (int tile_size = 1; tile_size < SHAKE256_DEFAULT_SIZE; tile_size++) { + int left = SHAKE256_DEFAULT_SIZE; + u8 *p = out + 8; + + memset(out, 0, sizeof(out)); + shake256_init(&ctx); + shake_update(&ctx, test_sha3_sample, + sizeof(test_sha3_sample) - 1); + while (left > 0) { + int part = umin(tile_size, left); + + shake_squeeze(&ctx, p, part); + p += part; + left -= part; + } + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256, sizeof(test_shake256), + "SHAKE tile %u gives wrong output", tile_size); + } +} + +/* + * Output tiling test of SHAKE256; output tiles getting gradually smaller and + * then cycling round to medium sized ones. A series of squeezings of the same + * context should, if laid end-to-end, match a single squeezing of the combined + * size. + */ +static void test_shake256_tiling2(struct kunit *test) +{ + struct shake_ctx ctx; + u8 out[8 + SHA3_512_DIGEST_SIZE + 8]; + + for (int first_tile_size = 3; + first_tile_size < SHAKE256_DEFAULT_SIZE; + first_tile_size++) { + int tile_size = first_tile_size; + int left = SHAKE256_DEFAULT_SIZE; + u8 *p = out + 8; + + memset(out, 0, sizeof(out)); + shake256_init(&ctx); + shake_update(&ctx, test_sha3_sample, + sizeof(test_sha3_sample) - 1); + while (left > 0) { + int part = umin(tile_size, left); + + shake_squeeze(&ctx, p, part); + p += part; + left -= part; + tile_size--; + if (tile_size < 1) + tile_size = 5; + } + + KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256, sizeof(test_shake256), + "SHAKE tile %u gives wrong output", tile_size); + } +} + +static struct kunit_case sha3_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(test_sha3_224_basic), + KUNIT_CASE(test_sha3_256_basic), + KUNIT_CASE(test_sha3_384_basic), + KUNIT_CASE(test_sha3_512_basic), + KUNIT_CASE(test_shake128_basic), + KUNIT_CASE(test_shake256_basic), + KUNIT_CASE(test_shake128_nist), + KUNIT_CASE(test_shake256_nist), + KUNIT_CASE(test_shake256_tiling), + KUNIT_CASE(test_shake256_tiling2), + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite sha3_test_suite = { + .name = "sha3", + .test_cases = sha3_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(sha3_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for SHA3"); +MODULE_LICENSE("GPL"); diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index c5b7985fe728..47f79602e290 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -87,7 +87,7 @@ def print_c_struct_u8_array_field(name, value): def alg_digest_size_const(alg): if alg.startswith('blake2'): return f'{alg.upper()}_HASH_SIZE' - return f'{alg.upper()}_DIGEST_SIZE' + return f'{alg.upper().replace('-', '_')}_DIGEST_SIZE' def gen_unkeyed_testvecs(alg): print('') @@ -167,5 +167,7 @@ if alg.startswith('blake2'): gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': gen_additional_poly1305_testvecs() +elif alg.startswith('sha3-'): + pass # no HMAC else: gen_hmac_testvecs(alg) -- cgit v1.2.3 From b2210f35161d6202fcca4244800a1d54c80e8bc1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Oct 2025 22:50:23 -0700 Subject: lib/crypto: tests: Add additional SHAKE tests Add the following test cases to cover gaps in the SHAKE testing: - test_shake_all_lens_up_to_4096() - test_shake_multiple_squeezes() - test_shake_with_guarded_bufs() Remove test_shake256_tiling() and test_shake256_tiling2() since they are superseded by test_shake_multiple_squeezes(). It provides better test coverage by using randomized testing. E.g., it's able to generate a zero-length squeeze followed by a nonzero-length squeeze, which the first 7 versions of the SHA-3 patchset handled incorrectly. Tested-by: Harald Freudenberger Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251026055032.1413733-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/sha3-testvecs.h | 20 +++- lib/crypto/tests/sha3_kunit.c | 186 +++++++++++++++++++++++++----------- scripts/crypto/gen-hash-testvecs.py | 27 +++++- 3 files changed, 174 insertions(+), 59 deletions(-) (limited to 'scripts') diff --git a/lib/crypto/tests/sha3-testvecs.h b/lib/crypto/tests/sha3-testvecs.h index 9c4c403cc6e0..8d614a5fa0c3 100644 --- a/lib/crypto/tests/sha3-testvecs.h +++ b/lib/crypto/tests/sha3-testvecs.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py sha3-256 */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py sha3 */ + +/* SHA3-256 test vectors */ static const struct { size_t data_len; @@ -229,3 +231,19 @@ static const u8 hash_testvec_consolidated[SHA3_256_DIGEST_SIZE] = { 0x5e, 0xab, 0x9f, 0xb1, 0xe4, 0x23, 0x7c, 0x2c, 0x80, 0xcf, 0x09, 0x75, 0xf8, 0xe2, 0xfa, 0x30, }; + +/* SHAKE test vectors */ + +static const u8 shake128_testvec_consolidated[SHA3_256_DIGEST_SIZE] = { + 0x89, 0x88, 0x3a, 0x44, 0xec, 0xfe, 0x3c, 0xeb, + 0x2f, 0x1c, 0x1d, 0xda, 0x9e, 0x36, 0x64, 0xf0, + 0x85, 0x4c, 0x49, 0x12, 0x76, 0x5a, 0x4d, 0xe7, + 0xa8, 0xfd, 0xcd, 0xbe, 0x45, 0xb4, 0x6f, 0xb0, +}; + +static const u8 shake256_testvec_consolidated[SHA3_256_DIGEST_SIZE] = { + 0x5a, 0xfd, 0x66, 0x62, 0x5c, 0x37, 0x2b, 0x41, + 0x77, 0x1c, 0x01, 0x5d, 0x64, 0x7c, 0x63, 0x7a, + 0x7c, 0x76, 0x9e, 0xa8, 0xd1, 0xb0, 0x8e, 0x02, + 0x16, 0x9b, 0xfe, 0x0e, 0xb5, 0xd8, 0x6a, 0xb5, +}; diff --git a/lib/crypto/tests/sha3_kunit.c b/lib/crypto/tests/sha3_kunit.c index c267984c4aff..ed5fbe80337f 100644 --- a/lib/crypto/tests/sha3_kunit.c +++ b/lib/crypto/tests/sha3_kunit.c @@ -247,72 +247,149 @@ static void test_shake256_nist(struct kunit *test) "SHAKE256 gives wrong output for NIST.1600"); } -/* - * Output tiling test of SHAKE256; equal output tiles barring the last. A - * series of squeezings of the same context should, if laid end-to-end, match a - * single squeezing of the combined size. - */ -static void test_shake256_tiling(struct kunit *test) +static void shake(int alg, const u8 *in, size_t in_len, u8 *out, size_t out_len) { - struct shake_ctx ctx; - u8 out[8 + SHA3_512_DIGEST_SIZE + 8]; + if (alg == 0) + shake128(in, in_len, out, out_len); + else + shake256(in, in_len, out, out_len); +} - for (int tile_size = 1; tile_size < SHAKE256_DEFAULT_SIZE; tile_size++) { - int left = SHAKE256_DEFAULT_SIZE; - u8 *p = out + 8; +static void shake_init(struct shake_ctx *ctx, int alg) +{ + if (alg == 0) + shake128_init(ctx); + else + shake256_init(ctx); +} - memset(out, 0, sizeof(out)); - shake256_init(&ctx); - shake_update(&ctx, test_sha3_sample, - sizeof(test_sha3_sample) - 1); - while (left > 0) { - int part = umin(tile_size, left); +/* + * Test each of SHAKE128 and SHAKE256 with all input lengths 0 through 4096, for + * both input and output. The input and output lengths cycle through the values + * together, so we do 4096 tests total. To verify all the SHAKE outputs, + * compute and verify the SHA3-256 digest of all of them concatenated together. + */ +static void test_shake_all_lens_up_to_4096(struct kunit *test) +{ + struct sha3_ctx main_ctx; + const size_t max_len = 4096; + u8 *const in = test_buf; + u8 *const out = &test_buf[TEST_BUF_LEN - max_len]; + u8 main_hash[SHA3_256_DIGEST_SIZE]; + + KUNIT_ASSERT_LE(test, 2 * max_len, TEST_BUF_LEN); + + rand_bytes_seeded_from_len(in, max_len); + for (int alg = 0; alg < 2; alg++) { + sha3_256_init(&main_ctx); + for (size_t in_len = 0; in_len <= max_len; in_len++) { + size_t out_len = (in_len * 293) % (max_len + 1); + + shake(alg, in, in_len, out, out_len); + sha3_update(&main_ctx, out, out_len); + } + sha3_final(&main_ctx, main_hash); + if (alg == 0) + KUNIT_ASSERT_MEMEQ_MSG(test, main_hash, + shake128_testvec_consolidated, + sizeof(main_hash), + "shake128() gives wrong output"); + else + KUNIT_ASSERT_MEMEQ_MSG(test, main_hash, + shake256_testvec_consolidated, + sizeof(main_hash), + "shake256() gives wrong output"); + } +} - shake_squeeze(&ctx, p, part); - p += part; - left -= part; +/* + * Test that a sequence of SHAKE squeezes gives the same output as a single + * squeeze of the same total length. + */ +static void test_shake_multiple_squeezes(struct kunit *test) +{ + const size_t max_len = 512; + u8 *ref_out; + + KUNIT_ASSERT_GE(test, TEST_BUF_LEN, 2 * max_len); + + ref_out = kunit_kzalloc(test, max_len, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, ref_out); + + for (int i = 0; i < 2000; i++) { + const int alg = rand32() % 2; + const size_t in_len = rand_length(max_len); + const size_t out_len = rand_length(max_len); + const size_t in_offs = rand_offset(max_len - in_len); + const size_t out_offs = rand_offset(max_len - out_len); + u8 *const in = &test_buf[in_offs]; + u8 *const out = &test_buf[out_offs]; + struct shake_ctx ctx; + size_t remaining_len, j, num_parts; + + rand_bytes(in, in_len); + rand_bytes(out, out_len); + + /* Compute the output using the one-shot function. */ + shake(alg, in, in_len, ref_out, out_len); + + /* Compute the output using a random sequence of squeezes. */ + shake_init(&ctx, alg); + shake_update(&ctx, in, in_len); + remaining_len = out_len; + j = 0; + num_parts = 0; + while (rand_bool()) { + size_t part_len = rand_length(remaining_len); + + shake_squeeze(&ctx, &out[j], part_len); + num_parts++; + j += part_len; + remaining_len -= part_len; + } + if (remaining_len != 0 || rand_bool()) { + shake_squeeze(&ctx, &out[j], remaining_len); + num_parts++; } - KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256, sizeof(test_shake256), - "SHAKE tile %u gives wrong output", tile_size); + /* Verify that the outputs are the same. */ + KUNIT_ASSERT_MEMEQ_MSG( + test, out, ref_out, out_len, + "Multi-squeeze test failed with in_len=%zu in_offs=%zu out_len=%zu out_offs=%zu num_parts=%zu alg=%d", + in_len, in_offs, out_len, out_offs, num_parts, alg); } } /* - * Output tiling test of SHAKE256; output tiles getting gradually smaller and - * then cycling round to medium sized ones. A series of squeezings of the same - * context should, if laid end-to-end, match a single squeezing of the combined - * size. + * Test that SHAKE operations on buffers immediately followed by an unmapped + * page work as expected. This catches out-of-bounds memory accesses even if + * they occur in assembly code. */ -static void test_shake256_tiling2(struct kunit *test) +static void test_shake_with_guarded_bufs(struct kunit *test) { - struct shake_ctx ctx; - u8 out[8 + SHA3_512_DIGEST_SIZE + 8]; + const size_t max_len = 512; + u8 *reg_buf; - for (int first_tile_size = 3; - first_tile_size < SHAKE256_DEFAULT_SIZE; - first_tile_size++) { - int tile_size = first_tile_size; - int left = SHAKE256_DEFAULT_SIZE; - u8 *p = out + 8; - - memset(out, 0, sizeof(out)); - shake256_init(&ctx); - shake_update(&ctx, test_sha3_sample, - sizeof(test_sha3_sample) - 1); - while (left > 0) { - int part = umin(tile_size, left); - - shake_squeeze(&ctx, p, part); - p += part; - left -= part; - tile_size--; - if (tile_size < 1) - tile_size = 5; - } + KUNIT_ASSERT_GE(test, TEST_BUF_LEN, max_len); - KUNIT_ASSERT_MEMEQ_MSG(test, out, test_shake256, sizeof(test_shake256), - "SHAKE tile %u gives wrong output", tile_size); + reg_buf = kunit_kzalloc(test, max_len, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, reg_buf); + + for (int alg = 0; alg < 2; alg++) { + for (size_t len = 0; len <= max_len; len++) { + u8 *guarded_buf = &test_buf[TEST_BUF_LEN - len]; + + rand_bytes(reg_buf, len); + memcpy(guarded_buf, reg_buf, len); + + shake(alg, reg_buf, len, reg_buf, len); + shake(alg, guarded_buf, len, guarded_buf, len); + + KUNIT_ASSERT_MEMEQ_MSG( + test, reg_buf, guarded_buf, len, + "Guard page test failed with len=%zu alg=%d", + len, alg); + } } } @@ -326,8 +403,9 @@ static struct kunit_case sha3_test_cases[] = { KUNIT_CASE(test_shake256_basic), KUNIT_CASE(test_shake128_nist), KUNIT_CASE(test_shake256_nist), - KUNIT_CASE(test_shake256_tiling), - KUNIT_CASE(test_shake256_tiling2), + KUNIT_CASE(test_shake_all_lens_up_to_4096), + KUNIT_CASE(test_shake_multiple_squeezes), + KUNIT_CASE(test_shake_with_guarded_bufs), KUNIT_CASE(benchmark_hash), {}, }; diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index 47f79602e290..ae2682882cd1 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -111,6 +111,18 @@ def gen_unkeyed_testvecs(alg): f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', hash_final(ctx)) +def gen_additional_sha3_testvecs(): + max_len = 4096 + in_data = rand_bytes(max_len) + for alg in ['shake128', 'shake256']: + ctx = hashlib.new('sha3-256') + for in_len in range(max_len + 1): + out_len = (in_len * 293) % (max_len + 1) + out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) + ctx.update(out) + print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', + ctx.digest()) + def gen_hmac_testvecs(alg): ctx = hmac.new(rand_bytes(32), digestmod=alg) data = rand_bytes(4096) @@ -155,19 +167,26 @@ def gen_additional_poly1305_testvecs(): if len(sys.argv) != 2: sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') - sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305.\n') + sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305 or sha3.\n') sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') sys.exit(1) alg = sys.argv[1] print('/* SPDX-License-Identifier: GPL-2.0-or-later */') print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') -gen_unkeyed_testvecs(alg) if alg.startswith('blake2'): + gen_unkeyed_testvecs(alg) gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': + gen_unkeyed_testvecs(alg) gen_additional_poly1305_testvecs() -elif alg.startswith('sha3-'): - pass # no HMAC +elif alg == 'sha3': + print() + print('/* SHA3-256 test vectors */') + gen_unkeyed_testvecs('sha3-256') + print() + print('/* SHAKE test vectors */') + gen_additional_sha3_testvecs() else: + gen_unkeyed_testvecs(alg) gen_hmac_testvecs(alg) -- cgit v1.2.3 From b3aed551b3fca753469520c95b6f4c61ada028d3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 9 Nov 2025 15:47:18 -0800 Subject: lib/crypto: tests: Add KUnit tests for POLYVAL Add a test suite for the POLYVAL library, including: - All the standard tests and the benchmark from hash-test-template.h - Comparison with a test vector from the RFC - Test with key and message containing all one bits - Additional tests related to the key struct Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251109234726.638437-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 9 ++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/polyval-testvecs.h | 186 ++++++++++++++++++++++++++++++ lib/crypto/tests/polyval_kunit.c | 223 ++++++++++++++++++++++++++++++++++++ scripts/crypto/gen-hash-testvecs.py | 47 +++++++- 5 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 lib/crypto/tests/polyval-testvecs.h create mode 100644 lib/crypto/tests/polyval_kunit.c (limited to 'scripts') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 140afd1714ba..61d435c450bb 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -47,6 +47,15 @@ config CRYPTO_LIB_POLY1305_KUNIT_TEST help KUnit tests for the Poly1305 library functions. +config CRYPTO_LIB_POLYVAL_KUNIT_TEST + tristate "KUnit tests for POLYVAL" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_POLYVAL + help + KUnit tests for the POLYVAL library functions. + config CRYPTO_LIB_SHA1_KUNIT_TEST tristate "KUnit tests for SHA-1" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index f7d1392dc847..5109a0651925 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o +obj-$(CONFIG_CRYPTO_LIB_POLYVAL_KUNIT_TEST) += polyval_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST) += sha224_kunit.o sha256_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA512_KUNIT_TEST) += sha384_kunit.o sha512_kunit.o diff --git a/lib/crypto/tests/polyval-testvecs.h b/lib/crypto/tests/polyval-testvecs.h new file mode 100644 index 000000000000..3d33f60d58bb --- /dev/null +++ b/lib/crypto/tests/polyval-testvecs.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py polyval */ + +static const struct { + size_t data_len; + u8 digest[POLYVAL_DIGEST_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + }, + { + .data_len = 1, + .digest = { + 0xb5, 0x51, 0x69, 0x89, 0xd4, 0x3c, 0x59, 0xca, + 0x6a, 0x1c, 0x2a, 0xe9, 0xa1, 0x9c, 0x6c, 0x83, + }, + }, + { + .data_len = 2, + .digest = { + 0xf4, 0x50, 0xaf, 0x07, 0xda, 0x42, 0xa7, 0x41, + 0x4d, 0x24, 0x88, 0x87, 0xe3, 0x40, 0x73, 0x7c, + }, + }, + { + .data_len = 3, + .digest = { + 0x9e, 0x88, 0x78, 0x71, 0x4c, 0x55, 0x87, 0xe8, + 0xb4, 0x96, 0x3d, 0x56, 0xc8, 0xb2, 0xe1, 0x68, + }, + }, + { + .data_len = 16, + .digest = { + 0x9e, 0x81, 0x37, 0x8f, 0x49, 0xf7, 0xa2, 0xe4, + 0x04, 0x45, 0x12, 0x78, 0x45, 0x42, 0x27, 0xad, + }, + }, + { + .data_len = 32, + .digest = { + 0x60, 0x19, 0xd0, 0xa4, 0xf0, 0xde, 0x9e, 0xe7, + 0x6a, 0x89, 0x1a, 0xea, 0x80, 0x14, 0xa9, 0xa3, + }, + }, + { + .data_len = 48, + .digest = { + 0x0c, 0xa2, 0x70, 0x4d, 0x7c, 0x89, 0xac, 0x41, + 0xc2, 0x9e, 0x0d, 0x07, 0x07, 0x6a, 0x7f, 0xd5, + }, + }, + { + .data_len = 49, + .digest = { + 0x91, 0xd3, 0xa9, 0x5c, 0x79, 0x3d, 0x6b, 0x84, + 0x99, 0x54, 0xa7, 0xb4, 0x06, 0x66, 0xfd, 0x1c, + }, + }, + { + .data_len = 63, + .digest = { + 0x29, 0x37, 0xb8, 0xe5, 0xd8, 0x27, 0x4d, 0xfb, + 0x83, 0x4f, 0x67, 0xf7, 0xf9, 0xc1, 0x0a, 0x9d, + }, + }, + { + .data_len = 64, + .digest = { + 0x17, 0xa9, 0x06, 0x2c, 0xf3, 0xe8, 0x2e, 0xa6, + 0x6b, 0xb2, 0x1f, 0x5d, 0x94, 0x3c, 0x02, 0xa2, + }, + }, + { + .data_len = 65, + .digest = { + 0x7c, 0x80, 0x74, 0xd7, 0xa1, 0x37, 0x30, 0x64, + 0x3b, 0xa4, 0xa3, 0x98, 0xde, 0x47, 0x10, 0x23, + }, + }, + { + .data_len = 127, + .digest = { + 0x27, 0x3a, 0xcf, 0xf5, 0xaf, 0x9f, 0xd8, 0xd8, + 0x2d, 0x6a, 0x91, 0xfb, 0xb8, 0xfa, 0xbe, 0x0c, + }, + }, + { + .data_len = 128, + .digest = { + 0x97, 0x6e, 0xc4, 0xbe, 0x6b, 0x15, 0xa6, 0x7c, + 0xc4, 0xa2, 0xb8, 0x0a, 0x0e, 0x9c, 0xc7, 0x3a, + }, + }, + { + .data_len = 129, + .digest = { + 0x2b, 0xc3, 0x98, 0xba, 0x6e, 0x42, 0xf8, 0x18, + 0x85, 0x69, 0x15, 0x37, 0x10, 0x60, 0xe6, 0xac, + }, + }, + { + .data_len = 256, + .digest = { + 0x88, 0x21, 0x77, 0x89, 0xd7, 0x93, 0x90, 0xfc, + 0xf3, 0xb0, 0xe3, 0xfb, 0x14, 0xe2, 0xcf, 0x74, + }, + }, + { + .data_len = 511, + .digest = { + 0x66, 0x3d, 0x3e, 0x08, 0xa0, 0x49, 0x81, 0x68, + 0x3e, 0x3b, 0xc8, 0x80, 0x55, 0xd4, 0x15, 0xe9, + }, + }, + { + .data_len = 513, + .digest = { + 0x05, 0xf5, 0x06, 0x66, 0xe7, 0x11, 0x08, 0x84, + 0xff, 0x94, 0x50, 0x85, 0x65, 0x95, 0x2a, 0x20, + }, + }, + { + .data_len = 1000, + .digest = { + 0xd3, 0xa0, 0x51, 0x69, 0xb5, 0x38, 0xae, 0x1b, + 0xe1, 0xa2, 0x89, 0xc6, 0x8d, 0x2b, 0x62, 0x37, + }, + }, + { + .data_len = 3333, + .digest = { + 0x37, 0x6d, 0x6a, 0x14, 0xdc, 0xa5, 0x37, 0xfc, + 0xfe, 0x67, 0x76, 0xb2, 0x64, 0x68, 0x64, 0x05, + }, + }, + { + .data_len = 4096, + .digest = { + 0xe3, 0x12, 0x0c, 0x58, 0x46, 0x45, 0x27, 0x7a, + 0x0e, 0xa2, 0xfa, 0x2c, 0x35, 0x73, 0x6c, 0x94, + }, + }, + { + .data_len = 4128, + .digest = { + 0x63, 0x0d, 0xa1, 0xbc, 0x6e, 0x3e, 0xd3, 0x1d, + 0x28, 0x52, 0xd2, 0xf4, 0x30, 0x2d, 0xff, 0xc4, + }, + }, + { + .data_len = 4160, + .digest = { + 0xb2, 0x91, 0x49, 0xe2, 0x02, 0x98, 0x00, 0x79, + 0x71, 0xb9, 0xd7, 0xd4, 0xb5, 0x94, 0x6d, 0x7d, + }, + }, + { + .data_len = 4224, + .digest = { + 0x58, 0x96, 0x48, 0x69, 0x05, 0x17, 0xe1, 0x6d, + 0xbc, 0xf2, 0x3d, 0x10, 0x96, 0x00, 0x74, 0x58, + }, + }, + { + .data_len = 16384, + .digest = { + 0x99, 0x3c, 0xcb, 0x4d, 0x64, 0xc9, 0xa9, 0x41, + 0x52, 0x93, 0xfd, 0x65, 0xc4, 0xcc, 0xa5, 0xe5, + }, + }, +}; + +static const u8 hash_testvec_consolidated[POLYVAL_DIGEST_SIZE] = { + 0xdf, 0x68, 0x52, 0x99, 0x92, 0xc3, 0xe8, 0x88, + 0x29, 0x13, 0xc8, 0x35, 0x67, 0xa3, 0xd3, 0xad, +}; + +static const u8 polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE] = { + 0xd5, 0xf7, 0xfd, 0xb2, 0xa6, 0xef, 0x0b, 0x85, + 0x0d, 0x0a, 0x06, 0x10, 0xbc, 0x64, 0x94, 0x73, +}; diff --git a/lib/crypto/tests/polyval_kunit.c b/lib/crypto/tests/polyval_kunit.c new file mode 100644 index 000000000000..e59f598c1572 --- /dev/null +++ b/lib/crypto/tests/polyval_kunit.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "polyval-testvecs.h" + +/* + * A fixed key used when presenting POLYVAL as an unkeyed hash function in order + * to reuse hash-test-template.h. At the beginning of the test suite, this is + * initialized to a key prepared from bytes generated from a fixed seed. + */ +static struct polyval_key test_key; + +static void polyval_init_withtestkey(struct polyval_ctx *ctx) +{ + polyval_init(ctx, &test_key); +} + +static void polyval_withtestkey(const u8 *data, size_t len, + u8 out[POLYVAL_BLOCK_SIZE]) +{ + polyval(&test_key, data, len, out); +} + +/* Generate the HASH_KUNIT_CASES using hash-test-template.h. */ +#define HASH polyval_withtestkey +#define HASH_CTX polyval_ctx +#define HASH_SIZE POLYVAL_BLOCK_SIZE +#define HASH_INIT polyval_init_withtestkey +#define HASH_UPDATE polyval_update +#define HASH_FINAL polyval_final +#include "hash-test-template.h" + +/* + * Test an example from RFC8452 ("AES-GCM-SIV: Nonce Misuse-Resistant + * Authenticated Encryption") to ensure compatibility with that. + */ +static void test_polyval_rfc8452_testvec(struct kunit *test) +{ + static const u8 raw_key[POLYVAL_BLOCK_SIZE] = + "\x31\x07\x28\xd9\x91\x1f\x1f\x38" + "\x37\xb2\x43\x16\xc3\xfa\xb9\xa0"; + static const u8 data[48] = + "\x65\x78\x61\x6d\x70\x6c\x65\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x48\x65\x6c\x6c\x6f\x20\x77\x6f" + "\x72\x6c\x64\x00\x00\x00\x00\x00" + "\x38\x00\x00\x00\x00\x00\x00\x00" + "\x58\x00\x00\x00\x00\x00\x00\x00"; + static const u8 expected_hash[POLYVAL_BLOCK_SIZE] = + "\xad\x7f\xcf\x0b\x51\x69\x85\x16" + "\x62\x67\x2f\x3c\x5f\x95\x13\x8f"; + u8 hash[POLYVAL_BLOCK_SIZE]; + struct polyval_key key; + + polyval_preparekey(&key, raw_key); + polyval(&key, data, sizeof(data), hash); + KUNIT_ASSERT_MEMEQ(test, hash, expected_hash, sizeof(hash)); +} + +/* + * Test a key and messages containing all one bits. This is useful to detect + * overflow bugs in implementations that emulate carryless multiplication using + * a series of standard multiplications with the bits spread out. + */ +static void test_polyval_allones_key_and_message(struct kunit *test) +{ + struct polyval_key key; + struct polyval_ctx hashofhashes_ctx; + u8 hash[POLYVAL_BLOCK_SIZE]; + + static_assert(TEST_BUF_LEN >= 4096); + memset(test_buf, 0xff, 4096); + + polyval_preparekey(&key, test_buf); + polyval_init(&hashofhashes_ctx, &key); + for (size_t len = 0; len <= 4096; len += 16) { + polyval(&key, test_buf, len, hash); + polyval_update(&hashofhashes_ctx, hash, sizeof(hash)); + } + polyval_final(&hashofhashes_ctx, hash); + KUNIT_ASSERT_MEMEQ(test, hash, polyval_allones_hashofhashes, + sizeof(hash)); +} + +#define MAX_LEN_FOR_KEY_CHECK 1024 + +/* + * Given two prepared keys which should be identical (but may differ in + * alignment and/or whether they are followed by a guard page or not), verify + * that they produce consistent results on various data lengths. + */ +static void check_key_consistency(struct kunit *test, + const struct polyval_key *key1, + const struct polyval_key *key2) +{ + u8 *data = test_buf; + u8 hash1[POLYVAL_BLOCK_SIZE]; + u8 hash2[POLYVAL_BLOCK_SIZE]; + + rand_bytes(data, MAX_LEN_FOR_KEY_CHECK); + KUNIT_ASSERT_MEMEQ(test, key1, key2, sizeof(*key1)); + + for (int i = 0; i < 100; i++) { + size_t len = rand_length(MAX_LEN_FOR_KEY_CHECK); + + polyval(key1, data, len, hash1); + polyval(key2, data, len, hash2); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, sizeof(hash1)); + } +} + +/* Test that no buffer overreads occur on either raw_key or polyval_key. */ +static void test_polyval_with_guarded_key(struct kunit *test) +{ + u8 raw_key[POLYVAL_BLOCK_SIZE]; + u8 *guarded_raw_key = &test_buf[TEST_BUF_LEN - sizeof(raw_key)]; + struct polyval_key key1, key2; + struct polyval_key *guarded_key = + (struct polyval_key *)&test_buf[TEST_BUF_LEN - sizeof(key1)]; + + /* Prepare with regular buffers. */ + rand_bytes(raw_key, sizeof(raw_key)); + polyval_preparekey(&key1, raw_key); + + /* Prepare with guarded raw_key, then check that it works. */ + memcpy(guarded_raw_key, raw_key, sizeof(raw_key)); + polyval_preparekey(&key2, guarded_raw_key); + check_key_consistency(test, &key1, &key2); + + /* Prepare guarded polyval_key, then check that it works. */ + polyval_preparekey(guarded_key, raw_key); + check_key_consistency(test, &key1, guarded_key); +} + +/* + * Test that polyval_key only needs to be aligned to + * __alignof__(struct polyval_key), i.e. 8 bytes. The assembly code may prefer + * 16-byte or higher alignment, but it musn't require it. + */ +static void test_polyval_with_minimally_aligned_key(struct kunit *test) +{ + u8 raw_key[POLYVAL_BLOCK_SIZE]; + struct polyval_key key; + struct polyval_key *minaligned_key = + (struct polyval_key *)&test_buf[MAX_LEN_FOR_KEY_CHECK + + __alignof__(struct polyval_key)]; + + KUNIT_ASSERT_TRUE(test, IS_ALIGNED((uintptr_t)minaligned_key, + __alignof__(struct polyval_key))); + KUNIT_ASSERT_TRUE(test, + !IS_ALIGNED((uintptr_t)minaligned_key, + 2 * __alignof__(struct polyval_key))); + + rand_bytes(raw_key, sizeof(raw_key)); + polyval_preparekey(&key, raw_key); + polyval_preparekey(minaligned_key, raw_key); + check_key_consistency(test, &key, minaligned_key); +} + +struct polyval_irq_test_state { + struct polyval_key expected_key; + u8 raw_key[POLYVAL_BLOCK_SIZE]; +}; + +static bool polyval_irq_test_func(void *state_) +{ + struct polyval_irq_test_state *state = state_; + struct polyval_key key; + + polyval_preparekey(&key, state->raw_key); + return memcmp(&key, &state->expected_key, sizeof(key)) == 0; +} + +/* + * Test that polyval_preparekey() produces the same output regardless of whether + * FPU or vector registers are usable when it is called. + */ +static void test_polyval_preparekey_in_irqs(struct kunit *test) +{ + struct polyval_irq_test_state state; + + rand_bytes(state.raw_key, sizeof(state.raw_key)); + polyval_preparekey(&state.expected_key, state.raw_key); + kunit_run_irq_test(test, polyval_irq_test_func, 20000, &state); +} + +static int polyval_suite_init(struct kunit_suite *suite) +{ + u8 raw_key[POLYVAL_BLOCK_SIZE]; + + rand_bytes_seeded_from_len(raw_key, sizeof(raw_key)); + polyval_preparekey(&test_key, raw_key); + return hash_suite_init(suite); +} + +static void polyval_suite_exit(struct kunit_suite *suite) +{ + hash_suite_exit(suite); +} + +static struct kunit_case polyval_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(test_polyval_rfc8452_testvec), + KUNIT_CASE(test_polyval_allones_key_and_message), + KUNIT_CASE(test_polyval_with_guarded_key), + KUNIT_CASE(test_polyval_with_minimally_aligned_key), + KUNIT_CASE(test_polyval_preparekey_in_irqs), + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite polyval_test_suite = { + .name = "polyval", + .test_cases = polyval_test_cases, + .suite_init = polyval_suite_init, + .suite_exit = polyval_suite_exit, +}; +kunit_test_suite(polyval_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for POLYVAL"); +MODULE_LICENSE("GPL"); diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index ae2682882cd1..c1d0517140bd 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-or-later # -# Script that generates test vectors for the given cryptographic hash function. +# Script that generates test vectors for the given hash function. # # Copyright 2025 Google LLC @@ -50,11 +50,42 @@ class Poly1305: m = (self.h + self.s) % 2**128 return m.to_bytes(16, byteorder='little') +POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) +POLYVAL_BLOCK_SIZE = 16 + +# A straightforward, unoptimized implementation of POLYVAL. +# Reference: https://datatracker.ietf.org/doc/html/rfc8452 +class Polyval: + def __init__(self, key): + assert len(key) == 16 + self.h = int.from_bytes(key, byteorder='little') + self.acc = 0 + + # Note: this supports partial blocks only at the end. + def update(self, data): + for i in range(0, len(data), 16): + # acc += block + self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') + # acc = (acc * h * x^-128) mod POLYVAL_POLY + product = 0 + for j in range(128): + if (self.h & (1 << j)) != 0: + product ^= self.acc << j + if (product & (1 << j)) != 0: + product ^= POLYVAL_POLY << j + self.acc = product >> 128 + return self + + def digest(self): + return self.acc.to_bytes(16, byteorder='little') + def hash_init(alg): if alg == 'poly1305': # Use a fixed random key here, to present Poly1305 as an unkeyed hash. # This allows all the test cases for unkeyed hashes to work on Poly1305. return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) + if alg == 'polyval': + return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) return hashlib.new(alg) def hash_update(ctx, data): @@ -165,9 +196,18 @@ def gen_additional_poly1305_testvecs(): 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', Poly1305(key).update(data).digest()) +def gen_additional_polyval_testvecs(): + key = b'\xff' * POLYVAL_BLOCK_SIZE + hashes = b'' + for data_len in range(0, 4097, 16): + hashes += Polyval(key).update(b'\xff' * data_len).digest() + print_static_u8_array_definition( + 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', + Polyval(key).update(hashes).digest()) + if len(sys.argv) != 2: sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') - sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305 or sha3.\n') + sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n') sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') sys.exit(1) @@ -180,6 +220,9 @@ if alg.startswith('blake2'): elif alg == 'poly1305': gen_unkeyed_testvecs(alg) gen_additional_poly1305_testvecs() +elif alg == 'polyval': + gen_unkeyed_testvecs(alg) + gen_additional_polyval_testvecs() elif alg == 'sha3': print() print('/* SHA3-256 test vectors */') -- cgit v1.2.3 From fc387a0704cc86cf47de0e64236577af3e148ec2 Mon Sep 17 00:00:00 2001 From: Onur Özkan Date: Wed, 17 Sep 2025 20:37:24 +0300 Subject: checkpatch: detect unhandled placeholders in cover letters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new check PLACEHOLDER_USE to detect unhandled placeholders. This prevents sending patch series with incomplete patches (mostly in cover letters) containing auto generated subject or blurb lines. These placeholders can be seen on mailing lists. With this change, checkpatch will emit an error when such text is found. Link: https://lkml.kernel.org/r/20250917173725.22547-2-work@onurozkan.dev Signed-off-by: Onur Özkan Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 92669904eecc..6729f18e5654 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3345,6 +3345,13 @@ sub process { } } +# Check for auto-generated unhandled placeholder text (mostly for cover letters) + if (($in_commit_log || $in_header_lines) && + $rawline =~ /(?:SUBJECT|BLURB) HERE/) { + ERROR("PLACEHOLDER_USE", + "Placeholder text detected\n" . $herecurr); + } + # Check for git id commit length and improperly formed commit descriptions # A correctly formed commit description is: # commit ("Complete commit subject") -- cgit v1.2.3 From 4ab2ee307983548b29ddaab0ecaef82d526cf4c9 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 11 Nov 2025 08:43:51 +0200 Subject: kbuild: install-extmod-build: Properly fix CC expansion when ccache is used Currently, when cross-compiling and ccache is used, the expanding of CC turns out to be without any quotes, leading to the following error: make[4]: *** No rule to make target 'aarch64-linux-gnu-gcc'. Stop. make[3]: *** [Makefile:2164: run-command] Error 2 And it makes sense, because after expansion it ends up like this: make run-command KBUILD_RUN_COMMAND=+$(MAKE) \ HOSTCC=ccache aarch64-linux-gnu-gcc VPATH= srcroot=. $(build)= ... So add another set of double quotes to surround whatever CC expands to to make sure the aarch64-linux-gnu-gcc isn't expanded to something that looks like an entirely separate target. Fixes: 140332b6ed72 ("kbuild: fix linux-headers package build when $(CC) cannot link userspace") Signed-off-by: Abel Vesa Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/20251111-kbuild-install-extmod-build-fix-cc-expand-third-try-v2-1-15ba1b37e71a@linaro.org Signed-off-by: Nathan Chancellor --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 054fdf45cc37..2576cf7902db 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="'"${CC}"'" VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- cgit v1.2.3 From f64c7e113dc937e0987c83ef51a3cd52a2c277c7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Nov 2025 10:53:43 +0100 Subject: scripts: docs: kdoc_files.py: don't consider symlinks as directories As reported by Randy, currently kdoc_files can go into endless looks when symlinks are used: $ ln -s . Documentation/peci/foo $ ./scripts/kernel-doc Documentation/peci/ ... File "/new_devel/docs/scripts/lib/kdoc/kdoc_files.py", line 52, in _parse_dir if entry.is_dir(): ~~~~~~~~~~~~^^ OSError: [Errno 40] Too many levels of symbolic links: 'Documentation/peci/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo' Prevent that by not considering symlinks as directories. Reported-by: Randy Dunlap Closes: https://lore.kernel.org/linux-doc/80701524-09fd-4d68-8715-331f47c969f2@infradead.org/ Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jonathan Corbet Message-ID: <73c3450f34e2a4b42ef2ef279d7487c47d22e3bd.1763027622.git.mchehab+huawei@kernel.org> --- scripts/lib/kdoc/kdoc_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index 061c033f32da..1fd8d17edb32 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -49,7 +49,7 @@ class GlobSourceFiles: for entry in obj: name = os.path.join(dirname, entry.name) - if entry.is_dir(): + if entry.is_dir(follow_symlinks=False): yield from self._parse_dir(name) if not entry.is_file(): -- cgit v1.2.3 From d81d9d389b9b73acd68f300c8889c7fa1acd4977 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 14 Nov 2025 14:43:56 +0100 Subject: kbuild: don't enable CC_CAN_LINK if the dummy program generates warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that the kernel toolchain generates warnings when used together with the system toolchain. This happens for example when the older kernel toolchain does not handle new versions of sframe debug information. While these warnings where ignored during the evaluation of CC_CAN_LINK, together with CONFIG_WERROR the actual userprog build will later fail. Example warning: .../x86_64-linux/13.2.0/../../../../x86_64-linux/bin/ld: error in /lib/../lib64/crt1.o(.sframe); no .sframe will be created collect2: error: ld returned 1 exit status Make sure that the very simple example program does not generate warnings already to avoid breaking the userprog compilations. Fixes: ec4a3992bc0b ("kbuild: respect CONFIG_WERROR for linker and assembler") Fixes: 3f0ff4cc6ffb ("kbuild: respect CONFIG_WERROR for userprogs") Signed-off-by: Thomas Weißschuh Reviewed-by: Nicolas Schier Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251114-kbuild-userprogs-bits-v3-1-4dee0d74d439@linutronix.de Signed-off-by: Nicolas Schier --- scripts/cc-can-link.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/cc-can-link.sh b/scripts/cc-can-link.sh index 6efcead31989..e67fd8d7b684 100755 --- a/scripts/cc-can-link.sh +++ b/scripts/cc-can-link.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 +cat << "END" | $@ -Werror -Wl,--fatal-warnings -x c - -o /dev/null >/dev/null 2>&1 #include int main(void) { -- cgit v1.2.3 From 80623f2c83d7de5c289d4240b8f4cef4103c51fc Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 14 Nov 2025 14:43:57 +0100 Subject: init: deduplicate cc-can-link.sh invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command to invoke scripts/cc-can-link.sh is very long and new usages are about to be added. Add a helper variable to make the code easier to read and maintain. Signed-off-by: Thomas Weißschuh Reviewed-by: Nicolas Schier Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251114-kbuild-userprogs-bits-v3-2-4dee0d74d439@linutronix.de Signed-off-by: Nicolas Schier --- init/Kconfig | 4 ++-- scripts/Kconfig.include | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/init/Kconfig b/init/Kconfig index cab3ad28ca49..7b722e714d5c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -84,8 +84,8 @@ config RUSTC_LLVM_VERSION config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) + default $(cc_can_link_user,$(m64-flag)) if 64BIT + default $(cc_can_link_user,$(m32-flag)) # Fixed in GCC 14, 13.3, 12.4 and 11.5 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 33193ca6e803..d42042b6c9e2 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -65,6 +65,9 @@ cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$ m32-flag := $(cc-option-bit,-m32) m64-flag := $(cc-option-bit,-m64) +# Test whether the compiler can link userspace applications +cc_can_link_user = $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(1)) + rustc-version := $(shell,$(srctree)/scripts/rustc-version.sh $(RUSTC)) rustc-llvm-version := $(shell,$(srctree)/scripts/rustc-llvm-version.sh $(RUSTC)) -- cgit v1.2.3 From ab844cf32058d6ba9bd98a921afa2256085e21fc Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 10 Nov 2025 12:35:27 +0100 Subject: rust: allow `unreachable_pub` for doctests Examples (i.e. doctests) may want to show public items such as structs, thus the `unreachable_pub` warning is not very helpful. Thus allow it for all doctests. In addition, remove it from the existing `expect`s we have in a couple doctests. Suggested-by: Alice Ryhl Link: https://lore.kernel.org/rust-for-linux/aRG9VjsaCjsvAwUn@google.com/ Reviewed-by: David Gow Acked-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251110113528.1658238-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 2 +- rust/kernel/types.rs | 2 +- scripts/rustdoc_test_gen.rs | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 4949047af8d7..e476d81c1a27 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -67,7 +67,7 @@ //! ``` //! //! ```rust -//! # #![expect(unreachable_pub, clippy::disallowed_names)] +//! # #![expect(clippy::disallowed_names)] //! use kernel::{prelude::*, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index dc0a02f5c3cf..835824788506 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -289,7 +289,7 @@ impl Drop for ScopeGuard { /// # Examples /// /// ``` -/// # #![expect(unreachable_pub, clippy::disallowed_names)] +/// # #![expect(clippy::disallowed_names)] /// use kernel::types::Opaque; /// # // Emulate a C struct binding which is from C, maybe uninitialized or not, only the C side /// # // knows. diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index c8f9dc2ab976..0e6a0542d1bd 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -208,6 +208,7 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{ #[allow(unused)] static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; {{ + #![allow(unreachable_pub)] {body} main(); }} -- cgit v1.2.3 From d8c8a575f5aa7afc7cc7718b779c29ae5f7abc58 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 14 Nov 2025 16:25:37 -0600 Subject: kbuild: Ensure .dtbo targets are applied to a base .dtb It is a requirement that DT overlays in the kernel are applied at build time to a base DTB in order to validate they can be applied and to validate them against the DT schemas. DT overlays on their own may be incomplete and can't be validated. Add a kbuild check so this doesn't have to be checked and fixed periodically. Signed-off-by: Rob Herring (Arm) --- scripts/Makefile.dtbs | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.dtbs b/scripts/Makefile.dtbs index 2d321b813600..e092b460d5a1 100644 --- a/scripts/Makefile.dtbs +++ b/scripts/Makefile.dtbs @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only +all-dtb := $(dtb-y) $(dtb-) + # If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built dtb-$(CONFIG_OF_ALL_DTBS) += $(dtb-) @@ -10,6 +12,13 @@ real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs) # Base DTB that overlay is applied onto base-dtb-y := $(filter %.dtb, $(call real-search, $(multi-dtb-y), .dtb, -dtbs)) +# Ensure that any .dtbo is applied to at least one base .dtb. Otherwise, it +# does not get validated. +applied-dtbo := $(filter %.dtbo, \ + $(call real-search, $(call multi-search, $(all-dtb), .dtb, -dtbs), .dtb, -dtbs)) +unapplied-dtbo := $(filter-out $(applied-dtbo),$(filter %.dtbo, $(dtb-y))) +$(if $(unapplied-dtbo), $(warning .dtbo is not applied to any base: $(unapplied-dtbo))) + dtb-y := $(addprefix $(obj)/, $(dtb-y)) multi-dtb-y := $(addprefix $(obj)/, $(multi-dtb-y)) real-dtb-y := $(addprefix $(obj)/, $(real-dtb-y)) -- cgit v1.2.3 From e5d330e13f67d574f683c052c9a342814fd8fa39 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 17 Nov 2025 09:07:13 +0100 Subject: rust: allow `clippy::disallowed_names` for doctests Examples (i.e. doctests) may want to use names such as `foo`, thus the `clippy::disallowed_names` lint [1] gets in the way. Thus allow it for all doctests. In addition, remove it from the existing `expect`s we have in a few doctests. This does not mean that we should stop trying to find good names for our examples, though. Link: https://rust-lang.github.io/rust-clippy/stable/index.html#disallowed_names [1] Suggested-by: Alice Ryhl Link: https://lore.kernel.org/rust-for-linux/aRHSLChi5HYXW4-9@google.com/ Reviewed-by: Alice Ryhl Acked-by: Benno Lossin Link: https://patch.msgid.link/20251117080714.876978-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 3 +-- rust/kernel/types.rs | 1 - scripts/rustdoc_test_gen.rs | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index e476d81c1a27..899b9a962762 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -30,7 +30,7 @@ //! ## General Examples //! //! ```rust -//! # #![expect(clippy::disallowed_names, clippy::undocumented_unsafe_blocks)] +//! # #![expect(clippy::undocumented_unsafe_blocks)] //! use kernel::types::Opaque; //! use pin_init::pin_init_from_closure; //! @@ -67,7 +67,6 @@ //! ``` //! //! ```rust -//! # #![expect(clippy::disallowed_names)] //! use kernel::{prelude::*, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 835824788506..9c5e7dbf1632 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -289,7 +289,6 @@ impl Drop for ScopeGuard { /// # Examples /// /// ``` -/// # #![expect(clippy::disallowed_names)] /// use kernel::types::Opaque; /// # // Emulate a C struct binding which is from C, maybe uninitialized or not, only the C side /// # // knows. diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 0e6a0542d1bd..be0561049660 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -208,7 +208,7 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{ #[allow(unused)] static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; {{ - #![allow(unreachable_pub)] + #![allow(unreachable_pub, clippy::disallowed_names)] {body} main(); }} -- cgit v1.2.3 From 2092007aa32f8dd968c38751bd1b7cac9b1f738d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 12 Nov 2025 15:32:34 -0800 Subject: objtool/klp: Only enable --checksum when needed With CONFIG_KLP_BUILD enabled, checksums are only needed during a klp-build run. There's no need to enable them for normal kernel builds. This also has the benefit of softening the xxhash dependency. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Link: https://patch.msgid.link/edbb1ca215e4926e02edb493b68b9d6d063e902f.1762990139.git.jpoimboe@kernel.org --- arch/x86/boot/startup/Makefile | 2 +- scripts/Makefile.lib | 1 - scripts/livepatch/klp-build | 4 ++++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile index e8fdf020b422..5e499cfb29b5 100644 --- a/arch/x86/boot/startup/Makefile +++ b/arch/x86/boot/startup/Makefile @@ -36,7 +36,7 @@ $(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y # relocations, even if other objtool actions are being deferred. # $(pi-objs): objtool-enabled = 1 -$(pi-objs): objtool-args = $(if $(delay-objtool),,$(objtool-args-y)) --noabs +$(pi-objs): objtool-args = $(if $(delay-objtool),--dry-run,$(objtool-args-y)) --noabs # # Confine the startup code by prefixing all symbols with __pi_ (for position diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f4b33919ec37..28a1c08e3b22 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -173,7 +173,6 @@ ifdef CONFIG_OBJTOOL objtool := $(objtree)/tools/objtool/objtool -objtool-args-$(CONFIG_KLP_BUILD) += --checksum objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 881e052e7fae..882272120c9e 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -489,8 +489,11 @@ clean_kernel() { build_kernel() { local log="$TMP_DIR/build.log" + local objtool_args=() local cmd=() + objtool_args=("--checksum") + cmd=("make") # When a patch to a kernel module references a newly created unexported @@ -513,6 +516,7 @@ build_kernel() { cmd+=("$VERBOSE") cmd+=("-j$JOBS") cmd+=("KCFLAGS=-ffunction-sections -fdata-sections") + cmd+=("OBJTOOL_ARGS=${objtool_args[*]}") cmd+=("vmlinux") cmd+=("modules") -- cgit v1.2.3 From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- .pylintrc | 2 +- Documentation/Makefile | 2 +- Documentation/sphinx/kernel_abi.py | 2 +- Documentation/sphinx/kernel_include.py | 2 +- Documentation/sphinx/kerneldoc.py | 2 +- MAINTAINERS | 3 +- scripts/jobserver-exec | 2 +- scripts/kernel-doc.py | 2 +- scripts/lib/abi/abi_parser.py | 628 ---------- scripts/lib/abi/abi_regex.py | 234 ---- scripts/lib/abi/helpers.py | 38 - scripts/lib/abi/system_symbols.py | 378 ------ scripts/lib/jobserver.py | 149 --- scripts/lib/kdoc/kdoc_files.py | 294 ----- scripts/lib/kdoc/kdoc_item.py | 43 - scripts/lib/kdoc/kdoc_output.py | 824 ------------- scripts/lib/kdoc/kdoc_parser.py | 1667 --------------------------- scripts/lib/kdoc/kdoc_re.py | 270 ----- tools/docs/check-variable-fonts.py | 8 +- tools/docs/get_abi.py | 2 +- tools/docs/lib/__init__.py | 0 tools/docs/lib/enrich_formatter.py | 70 -- tools/docs/lib/latex_fonts.py | 167 --- tools/docs/lib/parse_data_structs.py | 482 -------- tools/docs/lib/python_version.py | 178 --- tools/docs/parse-headers.py | 9 +- tools/docs/sphinx-build-wrapper | 7 +- tools/docs/sphinx-pre-install | 5 +- tools/lib/python/abi/abi_parser.py | 628 ++++++++++ tools/lib/python/abi/abi_regex.py | 234 ++++ tools/lib/python/abi/helpers.py | 38 + tools/lib/python/abi/system_symbols.py | 378 ++++++ tools/lib/python/jobserver.py | 149 +++ tools/lib/python/kdoc/enrich_formatter.py | 70 ++ tools/lib/python/kdoc/kdoc_files.py | 294 +++++ tools/lib/python/kdoc/kdoc_item.py | 43 + tools/lib/python/kdoc/kdoc_output.py | 824 +++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 1667 +++++++++++++++++++++++++++ tools/lib/python/kdoc/kdoc_re.py | 270 +++++ tools/lib/python/kdoc/latex_fonts.py | 167 +++ tools/lib/python/kdoc/parse_data_structs.py | 482 ++++++++ tools/lib/python/kdoc/python_version.py | 178 +++ 42 files changed, 5451 insertions(+), 5441 deletions(-) delete mode 100644 scripts/lib/abi/abi_parser.py delete mode 100644 scripts/lib/abi/abi_regex.py delete mode 100644 scripts/lib/abi/helpers.py delete mode 100644 scripts/lib/abi/system_symbols.py delete mode 100755 scripts/lib/jobserver.py delete mode 100644 scripts/lib/kdoc/kdoc_files.py delete mode 100644 scripts/lib/kdoc/kdoc_item.py delete mode 100644 scripts/lib/kdoc/kdoc_output.py delete mode 100644 scripts/lib/kdoc/kdoc_parser.py delete mode 100644 scripts/lib/kdoc/kdoc_re.py delete mode 100644 tools/docs/lib/__init__.py delete mode 100644 tools/docs/lib/enrich_formatter.py delete mode 100755 tools/docs/lib/latex_fonts.py delete mode 100755 tools/docs/lib/parse_data_structs.py delete mode 100644 tools/docs/lib/python_version.py create mode 100644 tools/lib/python/abi/abi_parser.py create mode 100644 tools/lib/python/abi/abi_regex.py create mode 100644 tools/lib/python/abi/helpers.py create mode 100644 tools/lib/python/abi/system_symbols.py create mode 100755 tools/lib/python/jobserver.py create mode 100644 tools/lib/python/kdoc/enrich_formatter.py create mode 100644 tools/lib/python/kdoc/kdoc_files.py create mode 100644 tools/lib/python/kdoc/kdoc_item.py create mode 100644 tools/lib/python/kdoc/kdoc_output.py create mode 100644 tools/lib/python/kdoc/kdoc_parser.py create mode 100644 tools/lib/python/kdoc/kdoc_re.py create mode 100755 tools/lib/python/kdoc/latex_fonts.py create mode 100755 tools/lib/python/kdoc/parse_data_structs.py create mode 100644 tools/lib/python/kdoc/python_version.py (limited to 'scripts') diff --git a/.pylintrc b/.pylintrc index 89eaf2100edd..8c6fc2b628b3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,2 +1,2 @@ [MASTER] -init-hook='import sys; sys.path += ["scripts/lib/kdoc", "scripts/lib/abi", "tools/docs/lib"]' +init-hook='import sys; sys.path += ["tools/lib/python"]' diff --git a/Documentation/Makefile b/Documentation/Makefile index c66df29cf0a3..fda2bef8d9d8 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -115,6 +115,6 @@ dochelp: @echo ' make PAPER={a4|letter} Specifies the paper size used for LaTeX/PDF output.' @echo @echo ' make FONTS_CONF_DENY_VF={path} sets a deny list to block variable Noto CJK fonts' - @echo ' for PDF build. See tools/docs/lib/latex_fonts.py for more details' + @echo ' for PDF build. See tools/lib/python/kdoc/latex_fonts.py for more details' @echo @echo ' Default location for the generated documents is Documentation/output' diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 32e39fb8bc3b..7ec832da8444 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,7 +43,7 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) from abi_parser import AbiParser diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index 75e139287d50..a12455daa6d7 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,7 +97,7 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from parse_data_structs import ParseDataStructs diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 2586b4d4e494..56f382a6bdf1 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,7 +42,7 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from kdoc_files import KernelFiles from kdoc_output import RestFormat diff --git a/MAINTAINERS b/MAINTAINERS index 8a9411e5c1e1..efe98e680c14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7412,8 +7412,7 @@ P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ F: scripts/kernel-doc* -F: scripts/lib/abi/* -F: scripts/lib/kdoc/* +F: tools/lib/python/* F: tools/docs/ F: tools/net/ynl/pyynl/lib/doc_generator.py X: Documentation/ABI/ diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index ae23afd344ec..758e947a6fb9 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -13,7 +13,7 @@ See: import os import sys -LIB_DIR = "lib" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index d9fe2bcbd39c..bb24bbf73167 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "lib/kdoc" +LIB_DIR = "../tools/lib/python/kdoc" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/lib/abi/abi_parser.py b/scripts/lib/abi/abi_parser.py deleted file mode 100644 index 66a738013ce1..000000000000 --- a/scripts/lib/abi/abi_parser.py +++ /dev/null @@ -1,628 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -from argparse import Namespace -import logging -import os -import re - -from pprint import pformat -from random import randrange, seed - -# Import Python modules - -from helpers import AbiDebug, ABI_DIR - - -class AbiParser: - """Main class to parse ABI files""" - - TAGS = r"(what|where|date|kernelversion|contact|description|users)" - XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" - - def __init__(self, directory, logger=None, - enable_lineno=False, show_warnings=True, debug=0): - """Stores arguments for the class and initialize class vars""" - - self.directory = directory - self.enable_lineno = enable_lineno - self.show_warnings = show_warnings - self.debug = debug - - if not logger: - self.log = logging.getLogger("get_abi") - else: - self.log = logger - - self.data = {} - self.what_symbols = {} - self.file_refs = {} - self.what_refs = {} - - # Ignore files that contain such suffixes - self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") - - # Regular expressions used on parser - self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) - self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) - self.re_valid = re.compile(self.TAGS) - self.re_start_spc = re.compile(r"(\s*)(\S.*)") - self.re_whitespace = re.compile(r"^\s+") - - # Regular used on print - self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") - self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") - self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") - self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") - self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") - self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") - self.re_xref_node = re.compile(self.XREF) - - def warn(self, fdata, msg, extra=None): - """Displays a parse error if warning is enabled""" - - if not self.show_warnings: - return - - msg = f"{fdata.fname}:{fdata.ln}: {msg}" - if extra: - msg += "\n\t\t" + extra - - self.log.warning(msg) - - def add_symbol(self, what, fname, ln=None, xref=None): - """Create a reference table describing where each 'what' is located""" - - if what not in self.what_symbols: - self.what_symbols[what] = {"file": {}} - - if fname not in self.what_symbols[what]["file"]: - self.what_symbols[what]["file"][fname] = [] - - if ln and ln not in self.what_symbols[what]["file"][fname]: - self.what_symbols[what]["file"][fname].append(ln) - - if xref: - self.what_symbols[what]["xref"] = xref - - def _parse_line(self, fdata, line): - """Parse a single line of an ABI file""" - - new_what = False - new_tag = False - content = None - - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - sep = match.group(2) - content = match.group(3) - - match = self.re_valid.search(new) - if match: - new_tag = match.group(1) - else: - if fdata.tag == "description": - # New "tag" is actually part of description. - # Don't consider it a tag - new_tag = False - elif fdata.tag != "": - self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) - - if new_tag: - # "where" is Invalid, but was a common mistake. Warn if found - if new_tag == "where": - self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") - new_tag = "what" - - if new_tag == "what": - fdata.space = None - - if content not in self.what_symbols: - self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) - - if fdata.tag == "what": - fdata.what.append(content.strip("\n")) - else: - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fdata.fname, - ln=fdata.what_ln, xref=fdata.key) - - fdata.label = content - new_what = True - - key = "abi_" + content.lower() - fdata.key = self.re_unprintable.sub("_", key).strip("_") - - # Avoid duplicated keys but using a defined seed, to make - # the namespace identical if there aren't changes at the - # ABI symbols - seed(42) - - while fdata.key in self.data: - char = randrange(0, 51) + ord("A") - if char > ord("Z"): - char += ord("a") - ord("Z") - 1 - - fdata.key += chr(char) - - if fdata.key and fdata.key not in self.data: - self.data[fdata.key] = { - "what": [content], - "file": [fdata.file_ref], - "path": fdata.ftype, - "line_no": fdata.ln, - } - - fdata.what = self.data[fdata.key]["what"] - - self.what_refs[content] = fdata.key - fdata.tag = new_tag - fdata.what_ln = fdata.ln - - if fdata.nametag["what"]: - t = (content, fdata.key) - if t not in fdata.nametag["symbols"]: - fdata.nametag["symbols"].append(t) - - return - - if fdata.tag and new_tag: - fdata.tag = new_tag - - if new_what: - fdata.label = "" - - if "description" in self.data[fdata.key]: - self.data[fdata.key]["description"] += "\n\n" - - if fdata.file_ref not in self.data[fdata.key]["file"]: - self.data[fdata.key]["file"].append(fdata.file_ref) - - if self.debug == AbiDebug.WHAT_PARSING: - self.log.debug("what: %s", fdata.what) - - if not fdata.what: - self.warn(fdata, "'What:' should come first:", line) - return - - if new_tag == "description": - fdata.space = None - - if content: - sep = sep.replace(":", " ") - - c = " " * len(new_tag) + sep + content - c = c.expandtabs() - - match = self.re_start_spc.match(c) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - content = match.group(2) + "\n" - - self.data[fdata.key][fdata.tag] = content - - return - - # Store any contents before tags at the database - if not fdata.tag and "what" in fdata.nametag: - fdata.nametag["description"] += line - return - - if fdata.tag == "description": - content = line.expandtabs() - - if self.re_whitespace.sub("", content) == "": - self.data[fdata.key][fdata.tag] += "\n" - return - - if fdata.space is None: - match = self.re_start_spc.match(content) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - - content = match.group(2) + "\n" - else: - if content.startswith(fdata.space): - content = content[len(fdata.space):] - - else: - fdata.space = "" - - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += content - return - - content = line.strip() - if fdata.tag: - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") - return - - # Everything else is error - if content: - self.warn(fdata, "Unexpected content", line) - - def parse_readme(self, nametag, fname): - """Parse ABI README file""" - - nametag["what"] = ["Introduction"] - nametag["path"] = "README" - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - - match = self.re_valid.search(new) - if match: - nametag["description"] += "\n:" + line - continue - - nametag["description"] += line - - def parse_file(self, fname, path, basename): - """Parse a single file""" - - ref = f"abi_file_{path}_{basename}" - ref = self.re_unprintable.sub("_", ref).strip("_") - - # Store per-file state into a namespace variable. This will be used - # by the per-line parser state machine and by the warning function. - fdata = Namespace - - fdata.fname = fname - fdata.name = basename - - pos = fname.find(ABI_DIR) - if pos > 0: - f = fname[pos:] - else: - f = fname - - fdata.file_ref = (f, ref) - self.file_refs[f] = ref - - fdata.ln = 0 - fdata.what_ln = 0 - fdata.tag = "" - fdata.label = "" - fdata.what = [] - fdata.key = None - fdata.xrefs = None - fdata.space = None - fdata.ftype = path.split("/")[0] - - fdata.nametag = {} - fdata.nametag["what"] = [f"ABI file {path}/{basename}"] - fdata.nametag["type"] = "File" - fdata.nametag["path"] = fdata.ftype - fdata.nametag["file"] = [fdata.file_ref] - fdata.nametag["line_no"] = 1 - fdata.nametag["description"] = "" - fdata.nametag["symbols"] = [] - - self.data[ref] = fdata.nametag - - if self.debug & AbiDebug.WHAT_OPEN: - self.log.debug("Opening file %s", fname) - - if basename == "README": - self.parse_readme(fdata.nametag, fname) - return - - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - fdata.ln += 1 - - self._parse_line(fdata, line) - - if "description" in fdata.nametag: - fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") - - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fname, xref=fdata.key) - - def _parse_abi(self, root=None): - """Internal function to parse documentation ABI recursively""" - - if not root: - root = self.directory - - with os.scandir(root) as obj: - for entry in obj: - name = os.path.join(root, entry.name) - - if entry.is_dir(): - self._parse_abi(name) - continue - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if basename.startswith("."): - continue - - if basename.endswith(self.ignore_suffixes): - continue - - path = self.re_abi_dir.sub("", os.path.dirname(name)) - - self.parse_file(name, path, basename) - - def parse_abi(self, root=None): - """Parse documentation ABI""" - - self._parse_abi(root) - - if self.debug & AbiDebug.DUMP_ABI_STRUCTS: - self.log.debug(pformat(self.data)) - - def desc_txt(self, desc): - """Print description as found inside ABI files""" - - desc = desc.strip(" \t\n") - - return desc + "\n\n" - - def xref(self, fname): - """ - Converts a Documentation/ABI + basename into a ReST cross-reference - """ - - xref = self.file_refs.get(fname) - if not xref: - return None - else: - return xref - - def desc_rst(self, desc): - """Enrich ReST output by creating cross-references""" - - # Remove title markups from the description - # Having titles inside ABI files will only work if extra - # care would be taken in order to strictly follow the same - # level order for each markup. - desc = self.re_title_mark.sub("\n\n", "\n" + desc) - desc = desc.rstrip(" \t\n").lstrip("\n") - - # Python's regex performance for non-compiled expressions is a lot - # than Perl, as Perl automatically caches them at their - # first usage. Here, we'll need to do the same, as otherwise the - # performance penalty is be high - - new_desc = "" - for d in desc.split("\n"): - if d == "": - new_desc += "\n" - continue - - # Use cross-references for doc files where needed - d = self.re_doc.sub(r":doc:`/\1`", d) - - # Use cross-references for ABI generated docs where needed - matches = self.re_abi.findall(d) - for m in matches: - abi = m[0] + m[1] - - xref = self.file_refs.get(abi) - if not xref: - # This may happen if ABI is on a separate directory, - # like parsing ABI testing and symbol is at stable. - # The proper solution is to move this part of the code - # for it to be inside sphinx/kernel_abi.py - self.log.info("Didn't find ABI reference for '%s'", abi) - else: - new = self.re_escape.sub(r"\\\1", m[1]) - d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) - - # Seek for cross reference symbols like /sys/... - # Need to be careful to avoid doing it on a code block - if d[0] not in [" ", "\t"]: - matches = self.re_xref_node.findall(d) - for m in matches: - # Finding ABI here is more complex due to wildcards - xref = self.what_refs.get(m) - if xref: - new = self.re_escape.sub(r"\\\1", m) - d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) - - new_desc += d + "\n" - - return new_desc + "\n\n" - - def doc(self, output_in_txt=False, show_symbols=True, show_file=True, - filter_path=None): - """Print ABI at stdout""" - - part = None - for key, v in sorted(self.data.items(), - key=lambda x: (x[1].get("type", ""), - x[1].get("what"))): - - wtype = v.get("type", "Symbol") - file_ref = v.get("file") - names = v.get("what", [""]) - - if wtype == "File": - if not show_file: - continue - else: - if not show_symbols: - continue - - if filter_path: - if v.get("path") != filter_path: - continue - - msg = "" - - if wtype != "File": - cur_part = names[0] - if cur_part.find("/") >= 0: - match = self.re_what.match(cur_part) - if match: - symbol = match.group(1).rstrip("/") - cur_part = "Symbols under " + symbol - - if cur_part and cur_part != part: - part = cur_part - msg += part + "\n"+ "-" * len(part) +"\n\n" - - msg += f".. _{key}:\n\n" - - max_len = 0 - for i in range(0, len(names)): # pylint: disable=C0200 - names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" - - max_len = max(max_len, len(names[i])) - - msg += "+-" + "-" * max_len + "-+\n" - for name in names: - msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" - msg += "+-" + "-" * max_len + "-+\n" - msg += "\n" - - for ref in file_ref: - if wtype == "File": - msg += f".. _{ref[1]}:\n\n" - else: - base = os.path.basename(ref[0]) - msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" - - if wtype == "File": - msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" - - desc = v.get("description") - if not desc and wtype != "File": - msg += f"DESCRIPTION MISSING for {names[0]}\n\n" - - if desc: - if output_in_txt: - msg += self.desc_txt(desc) - else: - msg += self.desc_rst(desc) - - symbols = v.get("symbols") - if symbols: - msg += "Has the following ABI:\n\n" - - for w, label in symbols: - # Escape special chars from content - content = self.re_escape.sub(r"\\\1", w) - - msg += f"- :ref:`{content} <{label}>`\n\n" - - users = v.get("users") - if users and users.strip(" \t\n"): - users = users.strip("\n").replace('\n', '\n\t') - msg += f"Users:\n\t{users}\n\n" - - ln = v.get("line_no", 1) - - yield (msg, file_ref[0][0], ln) - - def check_issues(self): - """Warn about duplicated ABI entries""" - - for what, v in self.what_symbols.items(): - files = v.get("file") - if not files: - # Should never happen if the parser works properly - self.log.warning("%s doesn't have a file associated", what) - continue - - if len(files) == 1: - continue - - f = [] - for fname, lines in sorted(files.items()): - if not lines: - f.append(f"{fname}") - elif len(lines) == 1: - f.append(f"{fname}:{lines[0]}") - else: - m = fname + "lines " - m += ", ".join(str(x) for x in lines) - f.append(m) - - self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) - - def search_symbols(self, expr): - """ Searches for ABI symbols """ - - regex = re.compile(expr, re.I) - - found_keys = 0 - for t in sorted(self.data.items(), key=lambda x: [0]): - v = t[1] - - wtype = v.get("type", "") - if wtype == "File": - continue - - for what in v.get("what", [""]): - if regex.search(what): - found_keys += 1 - - kernelversion = v.get("kernelversion", "").strip(" \t\n") - date = v.get("date", "").strip(" \t\n") - contact = v.get("contact", "").strip(" \t\n") - users = v.get("users", "").strip(" \t\n") - desc = v.get("description", "").strip(" \t\n") - - files = [] - for f in v.get("file", ()): - files.append(f[0]) - - what = str(found_keys) + ". " + what - title_tag = "-" * len(what) - - print(f"\n{what}\n{title_tag}\n") - - if kernelversion: - print(f"Kernel version:\t\t{kernelversion}") - - if date: - print(f"Date:\t\t\t{date}") - - if contact: - print(f"Contact:\t\t{contact}") - - if users: - print(f"Users:\t\t\t{users}") - - print("Defined on file(s):\t" + ", ".join(files)) - - if desc: - desc = desc.strip("\n") - print(f"\n{desc}\n") - - if not found_keys: - print(f"Regular expression /{expr}/ not found.") diff --git a/scripts/lib/abi/abi_regex.py b/scripts/lib/abi/abi_regex.py deleted file mode 100644 index 8a57846cbc69..000000000000 --- a/scripts/lib/abi/abi_regex.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 -# xxpylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Convert ABI what into regular expressions -""" - -import re -import sys - -from pprint import pformat - -from abi_parser import AbiParser -from helpers import AbiDebug - -class AbiRegex(AbiParser): - """Extends AbiParser to search ABI nodes with regular expressions""" - - # Escape only ASCII visible characters - escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" - leave_others = "others" - - # Tuples with regular expressions to be compiled and replacement data - re_whats = [ - # Drop escape characters that might exist - (re.compile("\\\\"), ""), - - # Temporarily escape dot characters - (re.compile(r"\."), "\xf6"), - - # Temporarily change [0-9]+ type of patterns - (re.compile(r"\[0\-9\]\+"), "\xff"), - - # Temporarily change [\d+-\d+] type of patterns - (re.compile(r"\[0\-\d+\]"), "\xff"), - (re.compile(r"\[0:\d+\]"), "\xff"), - (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), - - # Temporarily change [0-9] type of patterns - (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), - - # Handle multiple option patterns - (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), - - # Handle wildcards - (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), - (re.compile(r"/\*/"), "/.*/"), - (re.compile(r"/\xf6\xf6\xf6"), "/.*"), - (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), - (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), - (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), - - (re.compile(r"XX+"), "\\\\w\xf7"), - (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), - (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), - (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), - - # Recover [0-9] type of patterns - (re.compile(r"\xf4"), "["), - (re.compile(r"\xf5"), "]"), - - # Remove duplicated spaces - (re.compile(r"\s+"), r" "), - - # Special case: drop comparison as in: - # What: foo = - # (this happens on a few IIO definitions) - (re.compile(r"\s*\=.*$"), ""), - - # Escape all other symbols - (re.compile(escape_symbols), r"\\\1"), - (re.compile(r"\\\\"), r"\\"), - (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), - (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), - - (re.compile(r"\xff"), r"\\d+"), - - # Special case: IIO ABI which a parenthesis. - (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), - - # Simplify regexes with multiple .* - (re.compile(r"(?:\.\*){2,}"), ""), - - # Recover dot characters - (re.compile(r"\xf6"), "\\."), - # Recover plus characters - (re.compile(r"\xf7"), "+"), - ] - re_has_num = re.compile(r"\\d") - - # Symbol name after escape_chars that are considered a devnode basename - re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") - - # List of popular group names to be skipped to minimize regex group size - # Use AbiDebug.SUBGROUP_SIZE to detect those - skip_names = set(["devices", "hwmon"]) - - def regex_append(self, what, new): - """ - Get a search group for a subset of regular expressions. - - As ABI may have thousands of symbols, using a for to search all - regular expressions is at least O(n^2). When there are wildcards, - the complexity increases substantially, eventually becoming exponential. - - To avoid spending too much time on them, use a logic to split - them into groups. The smaller the group, the better, as it would - mean that searches will be confined to a small number of regular - expressions. - - The conversion to a regex subset is tricky, as we need something - that can be easily obtained from the sysfs symbol and from the - regular expression. So, we need to discard nodes that have - wildcards. - - If it can't obtain a subgroup, place the regular expression inside - a special group (self.leave_others). - """ - - search_group = None - - for search_group in reversed(new.split("/")): - if not search_group or search_group in self.skip_names: - continue - if self.re_symbol_name.match(search_group): - break - - if not search_group: - search_group = self.leave_others - - if self.debug & AbiDebug.SUBGROUP_MAP: - self.log.debug("%s: mapped as %s", what, search_group) - - try: - if search_group not in self.regex_group: - self.regex_group[search_group] = [] - - self.regex_group[search_group].append(re.compile(new)) - if self.search_string: - if what.find(self.search_string) >= 0: - print(f"What: {what}") - except re.PatternError: - self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" - " '%s'", what, new) - - def get_regexes(self, what): - """ - Given an ABI devnode, return a list of all regular expressions that - may match it, based on the sub-groups created by regex_append() - """ - - re_list = [] - - patches = what.split("/") - patches.reverse() - patches.append(self.leave_others) - - for search_group in patches: - if search_group in self.regex_group: - re_list += self.regex_group[search_group] - - return re_list - - def __init__(self, *args, **kwargs): - """ - Override init method to get verbose argument - """ - - self.regex_group = None - self.search_string = None - self.re_string = None - - if "search_string" in kwargs: - self.search_string = kwargs.get("search_string") - del kwargs["search_string"] - - if self.search_string: - - try: - self.re_string = re.compile(self.search_string) - except re.PatternError as e: - msg = f"{self.search_string} is not a valid regular expression" - raise ValueError(msg) from e - - super().__init__(*args, **kwargs) - - def parse_abi(self, *args, **kwargs): - - super().parse_abi(*args, **kwargs) - - self.regex_group = {} - - print("Converting ABI What fields into regexes...", file=sys.stderr) - - for t in sorted(self.data.items(), key=lambda x: x[0]): - v = t[1] - if v.get("type") == "File": - continue - - v["regex"] = [] - - for what in v.get("what", []): - if not what.startswith("/sys"): - continue - - new = what - for r, s in self.re_whats: - try: - new = r.sub(s, new) - except re.PatternError as e: - # Help debugging troubles with new regexes - raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e - - v["regex"].append(new) - - if self.debug & AbiDebug.REGEX: - self.log.debug("%-90s <== %s", new, what) - - # Store regex into a subgroup to speedup searches - self.regex_append(what, new) - - if self.debug & AbiDebug.SUBGROUP_DICT: - self.log.debug("%s", pformat(self.regex_group)) - - if self.debug & AbiDebug.SUBGROUP_SIZE: - biggestd_keys = sorted(self.regex_group.keys(), - key= lambda k: len(self.regex_group[k]), - reverse=True) - - print("Top regex subgroups:", file=sys.stderr) - for k in biggestd_keys[:10]: - print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/scripts/lib/abi/helpers.py b/scripts/lib/abi/helpers.py deleted file mode 100644 index 639b23e4ca33..000000000000 --- a/scripts/lib/abi/helpers.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# pylint: disable=R0903 -# SPDX-License-Identifier: GPL-2.0 - -""" -Helper classes for ABI parser -""" - -ABI_DIR = "Documentation/ABI/" - - -class AbiDebug: - """Debug levels""" - - WHAT_PARSING = 1 - WHAT_OPEN = 2 - DUMP_ABI_STRUCTS = 4 - UNDEFINED = 8 - REGEX = 16 - SUBGROUP_MAP = 32 - SUBGROUP_DICT = 64 - SUBGROUP_SIZE = 128 - GRAPH = 256 - - -DEBUG_HELP = """ -1 - enable debug parsing logic -2 - enable debug messages on file open -4 - enable debug for ABI parse data -8 - enable extra debug information to identify troubles - with ABI symbols found at the local machine that - weren't found on ABI documentation (used only for - undefined subcommand) -16 - enable debug for what to regex conversion -32 - enable debug for symbol regex subgroups -64 - enable debug for sysfs graph tree variable -""" diff --git a/scripts/lib/abi/system_symbols.py b/scripts/lib/abi/system_symbols.py deleted file mode 100644 index f15c94a6e33c..000000000000 --- a/scripts/lib/abi/system_symbols.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0912,R0914,R0915,R1702 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import os -import re -import sys - -from concurrent import futures -from datetime import datetime -from random import shuffle - -from helpers import AbiDebug - -class SystemSymbols: - """Stores arguments for the class and initialize class vars""" - - def graph_add_file(self, path, link=None): - """ - add a file path to the sysfs graph stored at self.root - """ - - if path in self.files: - return - - name = "" - ref = self.root - for edge in path.split("/"): - name += edge + "/" - if edge not in ref: - ref[edge] = {"__name": [name.rstrip("/")]} - - ref = ref[edge] - - if link and link not in ref["__name"]: - ref["__name"].append(link.rstrip("/")) - - self.files.add(path) - - def print_graph(self, root_prefix="", root=None, level=0): - """Prints a reference tree graph using UTF-8 characters""" - - if not root: - root = self.root - level = 0 - - # Prevent endless traverse - if level > 5: - return - - if level > 0: - prefix = "├──" - last_prefix = "└──" - else: - prefix = "" - last_prefix = "" - - items = list(root.items()) - - names = root.get("__name", []) - for k, edge in items: - if k == "__name": - continue - - if not k: - k = "/" - - if len(names) > 1: - k += " links: " + ",".join(names[1:]) - - if edge == items[-1][1]: - print(root_prefix + last_prefix + k) - p = root_prefix - if level > 0: - p += " " - self.print_graph(p, edge, level + 1) - else: - print(root_prefix + prefix + k) - p = root_prefix + "│ " - self.print_graph(p, edge, level + 1) - - def _walk(self, root): - """ - Walk through sysfs to get all devnodes that aren't ignored. - - By default, uses /sys as sysfs mounting point. If another - directory is used, it replaces them to /sys at the patches. - """ - - with os.scandir(root) as obj: - for entry in obj: - path = os.path.join(root, entry.name) - if self.sysfs: - p = path.replace(self.sysfs, "/sys", count=1) - else: - p = path - - if self.re_ignore.search(p): - return - - # Handle link first to avoid directory recursion - if entry.is_symlink(): - real = os.path.realpath(path) - if not self.sysfs: - self.aliases[path] = real - else: - real = real.replace(self.sysfs, "/sys", count=1) - - # Add absfile location to graph if it doesn't exist - if not self.re_ignore.search(real): - # Add link to the graph - self.graph_add_file(real, p) - - elif entry.is_file(): - self.graph_add_file(p) - - elif entry.is_dir(): - self._walk(path) - - def __init__(self, abi, sysfs="/sys", hints=False): - """ - Initialize internal variables and get a list of all files inside - sysfs that can currently be parsed. - - Please notice that there are several entries on sysfs that aren't - documented as ABI. Ignore those. - - The real paths will be stored under self.files. Aliases will be - stored in separate, as self.aliases. - """ - - self.abi = abi - self.log = abi.log - - if sysfs != "/sys": - self.sysfs = sysfs.rstrip("/") - else: - self.sysfs = None - - self.hints = hints - - self.root = {} - self.aliases = {} - self.files = set() - - dont_walk = [ - # Those require root access and aren't documented at ABI - f"^{sysfs}/kernel/debug", - f"^{sysfs}/kernel/tracing", - f"^{sysfs}/fs/pstore", - f"^{sysfs}/fs/bpf", - f"^{sysfs}/fs/fuse", - - # This is not documented at ABI - f"^{sysfs}/module", - - f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI - f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings - "sections|notes", # aren't actually part of ABI - - # kernel-parameters.txt - not easy to parse - "parameters", - ] - - self.re_ignore = re.compile("|".join(dont_walk)) - - print(f"Reading {sysfs} directory contents...", file=sys.stderr) - self._walk(sysfs) - - def check_file(self, refs, found): - """Check missing ABI symbols for a given sysfs file""" - - res_list = [] - - try: - for names in refs: - fname = names[0] - - res = { - "found": False, - "fname": fname, - "msg": "", - } - res_list.append(res) - - re_what = self.abi.get_regexes(fname) - if not re_what: - self.abi.log.warning(f"missing rules for {fname}") - continue - - for name in names: - for r in re_what: - if self.abi.debug & AbiDebug.UNDEFINED: - self.log.debug("check if %s matches '%s'", name, r.pattern) - if r.match(name): - res["found"] = True - if found: - res["msg"] += f" {fname}: regex:\n\t" - continue - - if self.hints and not res["found"]: - res["msg"] += f" {fname} not found. Tested regexes:\n" - for r in re_what: - res["msg"] += " " + r.pattern + "\n" - - except KeyboardInterrupt: - pass - - return res_list - - def _ref_interactor(self, root): - """Recursive function to interact over the sysfs tree""" - - for k, v in root.items(): - if isinstance(v, dict): - yield from self._ref_interactor(v) - - if root == self.root or k == "__name": - continue - - if self.abi.re_string: - fname = v["__name"][0] - if self.abi.re_string.search(fname): - yield v - else: - yield v - - - def get_fileref(self, all_refs, chunk_size): - """Interactor to group refs into chunks""" - - n = 0 - refs = [] - - for ref in all_refs: - refs.append(ref) - - n += 1 - if n >= chunk_size: - yield refs - n = 0 - refs = [] - - yield refs - - def check_undefined_symbols(self, max_workers=None, chunk_size=50, - found=None, dry_run=None): - """Seach ABI for sysfs symbols missing documentation""" - - self.abi.parse_abi() - - if self.abi.debug & AbiDebug.GRAPH: - self.print_graph() - - all_refs = [] - for ref in self._ref_interactor(self.root): - all_refs.append(ref["__name"]) - - if dry_run: - print("Would check", file=sys.stderr) - for ref in all_refs: - print(", ".join(ref)) - - return - - print("Starting to search symbols (it may take several minutes):", - file=sys.stderr) - start = datetime.now() - old_elapsed = None - - # Python doesn't support multithreading due to limitations on its - # global lock (GIL). While Python 3.13 finally made GIL optional, - # there are still issues related to it. Also, we want to have - # backward compatibility with older versions of Python. - # - # So, use instead multiprocess. However, Python is very slow passing - # data from/to multiple processes. Also, it may consume lots of memory - # if the data to be shared is not small. So, we need to group workload - # in chunks that are big enough to generate performance gains while - # not being so big that would cause out-of-memory. - - num_refs = len(all_refs) - print(f"Number of references to parse: {num_refs}", file=sys.stderr) - - if not max_workers: - max_workers = os.cpu_count() - elif max_workers > os.cpu_count(): - max_workers = os.cpu_count() - - max_workers = max(max_workers, 1) - - max_chunk_size = int((num_refs + max_workers - 1) / max_workers) - chunk_size = min(chunk_size, max_chunk_size) - chunk_size = max(1, chunk_size) - - if max_workers > 1: - executor = futures.ProcessPoolExecutor - - # Place references in a random order. This may help improving - # performance, by mixing complex/simple expressions when creating - # chunks - shuffle(all_refs) - else: - # Python has a high overhead with processes. When there's just - # one worker, it is faster to not create a new process. - # Yet, User still deserves to have a progress print. So, use - # python's "thread", which is actually a single process, using - # an internal schedule to switch between tasks. No performance - # gains for non-IO tasks, but still it can be quickly interrupted - # from time to time to display progress. - executor = futures.ThreadPoolExecutor - - not_found = [] - f_list = [] - with executor(max_workers=max_workers) as exe: - for refs in self.get_fileref(all_refs, chunk_size): - if refs: - try: - f_list.append(exe.submit(self.check_file, refs, found)) - - except KeyboardInterrupt: - return - - total = len(f_list) - - if not total: - if self.abi.re_string: - print(f"No ABI symbol matches {self.abi.search_string}") - else: - self.abi.log.warning("No ABI symbols found") - return - - print(f"{len(f_list):6d} jobs queued on {max_workers} workers", - file=sys.stderr) - - while f_list: - try: - t = futures.wait(f_list, timeout=1, - return_when=futures.FIRST_COMPLETED) - - done = t[0] - - for fut in done: - res_list = fut.result() - - for res in res_list: - if not res["found"]: - not_found.append(res["fname"]) - if res["msg"]: - print(res["msg"]) - - f_list.remove(fut) - except KeyboardInterrupt: - return - - except RuntimeError as e: - self.abi.log.warning(f"Future: {e}") - break - - if sys.stderr.isatty(): - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - if len(f_list) < total: - elapsed += f" ({total - len(f_list)}/{total} jobs completed). " - if elapsed != old_elapsed: - print(elapsed + "\r", end="", flush=True, - file=sys.stderr) - old_elapsed = elapsed - - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - print(elapsed, file=sys.stderr) - - for f in sorted(not_found): - print(f"{f} not found.") diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py deleted file mode 100755 index a24f30ef4fa8..000000000000 --- a/scripts/lib/jobserver.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# - -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - -import errno -import os -import subprocess -import sys - -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") - - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd, *args, **pwargs): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd, *args, **pwargs) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py deleted file mode 100644 index 1fd8d17edb32..000000000000 --- a/scripts/lib/kdoc/kdoc_files.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=R0903,R0913,R0914,R0917 - -""" -Parse lernel-doc tags on multiple kernel source files. -""" - -import argparse -import logging -import os -import re - -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat - - -class GlobSourceFiles: - """ - Parse C source code file names and directories via an Interactor. - """ - - def __init__(self, srctree=None, valid_extensions=None): - """ - Initialize valid extensions with a tuple. - - If not defined, assume default C extensions (.c and .h) - - It would be possible to use python's glob function, but it is - very slow, and it is not interactive. So, it would wait to read all - directories before actually do something. - - So, let's use our own implementation. - """ - - if not valid_extensions: - self.extensions = (".c", ".h") - else: - self.extensions = valid_extensions - - self.srctree = srctree - - def _parse_dir(self, dirname): - """Internal function to parse files recursively""" - - with os.scandir(dirname) as obj: - for entry in obj: - name = os.path.join(dirname, entry.name) - - if entry.is_dir(follow_symlinks=False): - yield from self._parse_dir(name) - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if not basename.endswith(self.extensions): - continue - - yield name - - def parse_files(self, file_list, file_not_found_cb): - """ - Define an interator to parse all source files from file_list, - handling directories if any - """ - - if not file_list: - return - - for fname in file_list: - if self.srctree: - f = os.path.join(self.srctree, fname) - else: - f = fname - - if os.path.isdir(f): - yield from self._parse_dir(f) - elif os.path.isfile(f): - yield f - elif file_not_found_cb: - file_not_found_cb(fname) - - -class KernelFiles(): - """ - Parse kernel-doc tags on multiple kernel source files. - - There are two type of parsers defined here: - - self.parse_file(): parses both kernel-doc markups and - EXPORT_SYMBOL* macros; - - self.process_export_file(): parses only EXPORT_SYMBOL* macros. - """ - - def warning(self, msg): - """Ancillary routine to output a warning and increment error count""" - - self.config.log.warning(msg) - self.errors += 1 - - def error(self, msg): - """Ancillary routine to output an error and increment error count""" - - self.config.log.error(msg) - self.errors += 1 - - def parse_file(self, fname): - """ - Parse a single Kernel source. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.files: - return - - doc = KernelDoc(self.config, fname) - export_table, entries = doc.parse_kdoc() - - self.export_table[fname] = export_table - - self.files.add(fname) - self.export_files.add(fname) # parse_kdoc() already check exports - - self.results[fname] = entries - - def process_export_file(self, fname): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.export_files: - return - - doc = KernelDoc(self.config, fname) - export_table = doc.parse_export() - - if not export_table: - self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") - export_table = set() - - self.export_table[fname] = export_table - self.export_files.add(fname) - - def file_not_found_cb(self, fname): - """ - Callback to warn if a file was not found. - """ - - self.error(f"Cannot find file {fname}") - - def __init__(self, verbose=False, out_style=None, - werror=False, wreturn=False, wshort_desc=False, - wcontents_before_sections=False, - logger=None): - """ - Initialize startup variables and parse all files - """ - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) - - if out_style is None: - out_style = OutputFormat() - - if not werror: - kcflags = os.environ.get("KCFLAGS", None) - if kcflags: - match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) - if match: - werror = True - - # reading this variable is for backwards compat just in case - # someone was calling it with the variable from outside the - # kernel's build system - kdoc_werror = os.environ.get("KDOC_WERROR", None) - if kdoc_werror: - werror = kdoc_werror - - # Some variables are global to the parser logic as a whole as they are - # used to send control configuration to KernelDoc class. As such, - # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace - - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections - - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - - self.config.src_tree = os.environ.get("SRCTREE", None) - - # Initialize variables that are internal to KernelFiles - - self.out_style = out_style - - self.errors = 0 - self.results = {} - - self.files = set() - self.export_files = set() - self.export_table = {} - - def parse(self, file_list, export_file=None): - """ - Parse all files - """ - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in glob.parse_files(file_list, self.file_not_found_cb): - self.parse_file(fname) - - for fname in glob.parse_files(export_file, self.file_not_found_cb): - self.process_export_file(fname) - - def out_msg(self, fname, name, arg): - """ - Return output messages from a file name using the output style - filtering. - - If output type was not handled by the syler, return None. - """ - - # NOTE: we can add rules here to filter out unwanted parts, - # although OutputFormat.msg already does that. - - return self.out_style.msg(fname, name, arg) - - def msg(self, enable_lineno=False, export=False, internal=False, - symbol=None, nosymbol=None, no_doc_sections=False, - filenames=None, export_file=None): - """ - Interacts over the kernel-doc results and output messages, - returning kernel-doc markups on each interaction - """ - - self.out_style.set_config(self.config) - - if not filenames: - filenames = sorted(self.results.keys()) - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in filenames: - function_table = set() - - if internal or export: - if not export_file: - export_file = [fname] - - for f in glob.parse_files(export_file, self.file_not_found_cb): - function_table |= self.export_table[f] - - if symbol: - for s in symbol: - function_table.add(s) - - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" - if fname not in self.results: - self.config.log.warning("No kernel-doc for file %s", fname) - continue - - symbols = self.results[fname] - self.out_style.set_symbols(symbols) - - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) - - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") - - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m - - if msg: - yield fname, msg diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py deleted file mode 100644 index 19805301cb2c..000000000000 --- a/scripts/lib/kdoc/kdoc_item.py +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# A class that will, eventually, encapsulate all of the parsed data that we -# then pass into the output modules. -# - -class KdocItem: - def __init__(self, name, fname, type, start_line, **other_stuff): - self.name = name - self.fname = fname - self.type = type - self.declaration_start_line = start_line - self.sections = {} - self.sections_start_lines = {} - self.parameterlist = [] - self.parameterdesc_start_lines = [] - self.parameterdescs = {} - self.parametertypes = {} - # - # Just save everything else into our own dict so that the output - # side can grab it directly as before. As we move things into more - # structured data, this will, hopefully, fade away. - # - self.other_stuff = other_stuff - - def get(self, key, default = None): - return self.other_stuff.get(key, default) - - def __getitem__(self, key): - return self.get(key) - - # - # Tracking of section and parameter information. - # - def set_sections(self, sections, start_lines): - self.sections = sections - self.section_start_lines = start_lines - - def set_params(self, names, descs, types, starts): - self.parameterlist = names - self.parameterdescs = descs - self.parametertypes = types - self.parameterdesc_start_lines = starts diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py deleted file mode 100644 index 58f115059e93..000000000000 --- a/scripts/lib/kdoc/kdoc_output.py +++ /dev/null @@ -1,824 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 - -""" -Implement output filters to print kernel-doc documentation. - -The implementation uses a virtual base class (OutputFormat) which -contains a dispatches to virtual methods, and some code to filter -out output messages. - -The actual implementation is done on one separate class per each type -of output. Currently, there are output classes for ReST and man/troff. -""" - -import os -import re -from datetime import datetime - -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe - - -function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) - -# match expressions used to find embedded type information -type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) -type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) -type_func = KernRe(r"(\w+)\(\)", cache=False) -type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# Special RST handling for func ptr params -type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) - -# Special RST handling for structs with func ptr params -type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) - -type_env = KernRe(r"(\$\w+)", cache=False) -type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) -type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) -type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) -type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) -type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) -type_fallback = KernRe(r"\&([_\w]+)", cache=False) -type_member_func = type_member + KernRe(r"\(\)", cache=False) - - -class OutputFormat: - """ - Base class for OutputFormat. If used as-is, it means that only - warnings will be displayed. - """ - - # output mode. - OUTPUT_ALL = 0 # output all symbols and doc sections - OUTPUT_INCLUDE = 1 # output only specified symbols - OUTPUT_EXPORTED = 2 # output exported symbols - OUTPUT_INTERNAL = 3 # output non-exported symbols - - # Virtual member to be overriden at the inherited classes - highlights = [] - - def __init__(self): - """Declare internal vars and set mode to OUTPUT_ALL""" - - self.out_mode = self.OUTPUT_ALL - self.enable_lineno = None - self.nosymbol = {} - self.symbol = None - self.function_table = None - self.config = None - self.no_doc_sections = False - - self.data = "" - - def set_config(self, config): - """ - Setup global config variables used by both parser and output. - """ - - self.config = config - - def set_filter(self, export, internal, symbol, nosymbol, function_table, - enable_lineno, no_doc_sections): - """ - Initialize filter variables according with the requested mode. - - Only one choice is valid between export, internal and symbol. - - The nosymbol filter can be used on all modes. - """ - - self.enable_lineno = enable_lineno - self.no_doc_sections = no_doc_sections - self.function_table = function_table - - if symbol: - self.out_mode = self.OUTPUT_INCLUDE - elif export: - self.out_mode = self.OUTPUT_EXPORTED - elif internal: - self.out_mode = self.OUTPUT_INTERNAL - else: - self.out_mode = self.OUTPUT_ALL - - if nosymbol: - self.nosymbol = set(nosymbol) - - - def highlight_block(self, block): - """ - Apply the RST highlights to a sub-block of text. - """ - - for r, sub in self.highlights: - block = r.sub(sub, block) - - return block - - def out_warnings(self, args): - """ - Output warnings for identifiers that will be displayed. - """ - - for log_msg in args.warnings: - self.config.warning(log_msg) - - def check_doc(self, name, args): - """Check if DOC should be output""" - - if self.no_doc_sections: - return False - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode == self.OUTPUT_INCLUDE: - if name in self.function_table: - self.out_warnings(args) - return True - - return False - - def check_declaration(self, dtype, name, args): - """ - Checks if a declaration should be output or not based on the - filtering criteria. - """ - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: - if name in self.function_table: - return True - - if self.out_mode == self.OUTPUT_INTERNAL: - if dtype != "function": - self.out_warnings(args) - return True - - if name not in self.function_table: - self.out_warnings(args) - return True - - return False - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser - """ - - self.data = "" - - dtype = args.type - - if dtype == "doc": - self.out_doc(fname, name, args) - return self.data - - if not self.check_declaration(dtype, name, args): - return self.data - - if dtype == "function": - self.out_function(fname, name, args) - return self.data - - if dtype == "enum": - self.out_enum(fname, name, args) - return self.data - - if dtype == "typedef": - self.out_typedef(fname, name, args) - return self.data - - if dtype in ["struct", "union"]: - self.out_struct(fname, name, args) - return self.data - - # Warn if some type requires an output logic - self.config.log.warning("doesn't now how to output '%s' block", - dtype) - - return None - - # Virtual methods to be overridden by inherited classes - # At the base class, those do nothing. - def set_symbols(self, symbols): - """Get a list of all symbols from kernel_doc""" - - def out_doc(self, fname, name, args): - """Outputs a DOC block""" - - def out_function(self, fname, name, args): - """Outputs a function""" - - def out_enum(self, fname, name, args): - """Outputs an enum""" - - def out_typedef(self, fname, name, args): - """Outputs a typedef""" - - def out_struct(self, fname, name, args): - """Outputs a struct""" - - -class RestFormat(OutputFormat): - """Consts and functions used by ReST output""" - - highlights = [ - (type_constant, r"``\1``"), - (type_constant2, r"``\1``"), - - # Note: need to escape () to avoid func matching later - (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), - (type_member, r":c:type:`\1\2\3 <\1>`"), - (type_fp_param, r"**\1\\(\\)**"), - (type_fp_param2, r"**\1\\(\\)**"), - (type_func, r"\1()"), - (type_enum, r":c:type:`\1 <\2>`"), - (type_struct, r":c:type:`\1 <\2>`"), - (type_typedef, r":c:type:`\1 <\2>`"), - (type_union, r":c:type:`\1 <\2>`"), - - # in rst this can refer to any type - (type_fallback, r":c:type:`\1`"), - (type_param_ref, r"**\1\2**") - ] - blankline = "\n" - - sphinx_literal = KernRe(r'^[^.].*::$', cache=False) - sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) - - def __init__(self): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.lineprefix = "" - - def print_lineno(self, ln): - """Outputs a line number""" - - if self.enable_lineno and ln is not None: - ln += 1 - self.data += f".. LINENO {ln}\n" - - def output_highlight(self, args): - """ - Outputs a C symbol that may require being converted to ReST using - the self.highlights variable - """ - - input_text = args - output = "" - in_literal = False - litprefix = "" - block = "" - - for line in input_text.strip("\n").split("\n"): - - # If we're in a literal block, see if we should drop out of it. - # Otherwise, pass the line straight through unmunged. - if in_literal: - if line.strip(): # If the line is not blank - # If this is the first non-blank line in a literal block, - # figure out the proper indent. - if not litprefix: - r = KernRe(r'^(\s*)') - if r.match(line): - litprefix = '^' + r.group(1) - else: - litprefix = "" - - output += line + "\n" - elif not KernRe(litprefix).match(line): - in_literal = False - else: - output += line + "\n" - else: - output += line + "\n" - - # Not in a literal block (or just dropped out) - if not in_literal: - block += line + "\n" - if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): - in_literal = True - litprefix = "" - output += self.highlight_block(block) - block = "" - - # Handle any remaining block - if block: - output += self.highlight_block(block) - - # Print the output with the line prefix - for line in output.strip("\n").split("\n"): - self.data += self.lineprefix + line + "\n" - - def out_section(self, args, out_docblock=False): - """ - Outputs a block section. - - This could use some work; it's used to output the DOC: sections, and - starts by putting out the name of the doc section itself, but that - tends to duplicate a header already in the template file. - """ - for section, text in args.sections.items(): - # Skip sections that are in the nosymbol_table - if section in self.nosymbol: - continue - - if out_docblock: - if not self.out_mode == self.OUTPUT_INCLUDE: - self.data += f".. _{section}:\n\n" - self.data += f'{self.lineprefix}**{section}**\n\n' - else: - self.data += f'{self.lineprefix}**{section}**\n\n' - - self.print_lineno(args.section_start_lines.get(section, 0)) - self.output_highlight(text) - self.data += "\n" - self.data += "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - self.out_section(args, out_docblock=True) - - def out_function(self, fname, name, args): - - oldprefix = self.lineprefix - signature = "" - - func_macro = args.get('func_macro', False) - if func_macro: - signature = name - else: - if args.get('functiontype'): - signature = args['functiontype'] + " " - signature += name + " (" - - ln = args.declaration_start_line - count = 0 - for parameter in args.parameterlist: - if count != 0: - signature += ", " - count += 1 - dtype = args.parametertypes.get(parameter, "") - - if function_pointer.search(dtype): - signature += function_pointer.group(1) + parameter + function_pointer.group(3) - else: - signature += dtype - - if not func_macro: - signature += ")" - - self.print_lineno(ln) - if args.get('typedef') or not args.get('functiontype'): - self.data += f".. c:macro:: {name}\n\n" - - if args.get('typedef'): - self.data += " **Typedef**: " - self.lineprefix = "" - self.output_highlight(args.get('purpose', "")) - self.data += "\n\n**Syntax**\n\n" - self.data += f" ``{signature}``\n\n" - else: - self.data += f"``{signature}``\n\n" - else: - self.data += f".. c:function:: {signature}\n\n" - - if not args.get('typedef'): - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', "")) - self.data += "\n" - - # Put descriptive text into a container (HTML
) to help set - # function prototypes apart - self.lineprefix = " " - - if args.parameterlist: - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Parameters**\n\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - dtype = args.parametertypes.get(parameter, "") - - if dtype: - self.data += f"{self.lineprefix}``{dtype}``\n" - else: - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.lineprefix = " " - if parameter_name in args.parameterdescs and \ - args.parameterdescs[parameter_name] != KernelDoc.undescribed: - - self.output_highlight(args.parameterdescs[parameter_name]) - self.data += "\n" - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.lineprefix = " " - - self.out_section(args) - self.lineprefix = oldprefix - - def out_enum(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:enum:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', '')) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - outer = self.lineprefix + " " - self.lineprefix = outer + " " - self.data += f"{outer}**Constants**\n\n" - - for parameter in args.parameterlist: - self.data += f"{outer}``{parameter}``\n" - - if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: - self.output_highlight(args.parameterdescs[parameter]) - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_typedef(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:type:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - - self.output_highlight(args.get('purpose', '')) - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_struct(self, fname, name, args): - - purpose = args.get('purpose', "") - declaration = args.get('definition', "") - dtype = args.type - ln = args.declaration_start_line - - self.data += f"\n\n.. c:{dtype}:: {name}\n\n" - - self.print_lineno(ln) - - oldprefix = self.lineprefix - self.lineprefix += " " - - self.output_highlight(purpose) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Definition**::\n\n" - - self.lineprefix = self.lineprefix + " " - - declaration = declaration.replace("\t", self.lineprefix) - - self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" - self.data += f"{declaration}{self.lineprefix}" + "};\n\n" - - self.lineprefix = " " - self.data += f"{self.lineprefix}**Members**\n\n" - for parameter in args.parameterlist: - if not parameter or parameter.startswith("#"): - continue - - parameter_name = parameter.split("[", maxsplit=1)[0] - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.lineprefix = " " - self.output_highlight(args.parameterdescs[parameter_name]) - self.lineprefix = " " - - self.data += "\n" - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - -class ManFormat(OutputFormat): - """Consts and functions used by man pages output""" - - highlights = ( - (type_constant, r"\1"), - (type_constant2, r"\1"), - (type_func, r"\\fB\1\\fP"), - (type_enum, r"\\fI\1\\fP"), - (type_struct, r"\\fI\1\\fP"), - (type_typedef, r"\\fI\1\\fP"), - (type_union, r"\\fI\1\\fP"), - (type_param, r"\\fI\1\\fP"), - (type_param_ref, r"\\fI\1\2\\fP"), - (type_member, r"\\fI\1\2\3\\fP"), - (type_fallback, r"\\fI\1\\fP") - ) - blankline = "" - - date_formats = [ - "%a %b %d %H:%M:%S %Z %Y", - "%a %b %d %H:%M:%S %Y", - "%Y-%m-%d", - "%b %d %Y", - "%B %d %Y", - "%m %d %Y", - ] - - def __init__(self, modulename): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.modulename = modulename - self.symbols = [] - - dt = None - tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") - if tstamp: - for fmt in self.date_formats: - try: - dt = datetime.strptime(tstamp, fmt) - break - except ValueError: - pass - - if not dt: - dt = datetime.now() - - self.man_date = dt.strftime("%B %Y") - - def arg_name(self, args, name): - """ - Return the name that will be used for the man page. - - As we may have the same name on different namespaces, - prepend the data type for all types except functions and typedefs. - - The doc section is special: it uses the modulename. - """ - - dtype = args.type - - if dtype == "doc": - return self.modulename - - if dtype in ["function", "typedef"]: - return name - - return f"{dtype} {name}" - - def set_symbols(self, symbols): - """ - Get a list of all symbols from kernel_doc. - - Man pages will uses it to add a SEE ALSO section with other - symbols at the same file. - """ - self.symbols = symbols - - def out_tail(self, fname, name, args): - """Adds a tail for all man pages""" - - # SEE ALSO section - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" - self.data += (f"Kernel file \\fB{args.fname}\\fR\n") - if len(self.symbols) >= 2: - cur_name = self.arg_name(args, name) - - related = [] - for arg in self.symbols: - out_name = self.arg_name(arg, arg.name) - - if cur_name == out_name: - continue - - related.append(f"\\fB{out_name}\\fR(9)") - - self.data += ",\n".join(related) + "\n" - - # TODO: does it make sense to add other sections? Maybe - # REPORTING ISSUES? LICENSE? - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser. - - Add a tail at the end of man pages output. - """ - super().msg(fname, name, args) - self.out_tail(fname, name, args) - - return self.data - - def output_highlight(self, block): - """ - Outputs a C symbol that may require being highlighted with - self.highlights variable using troff syntax - """ - - contents = self.highlight_block(block) - - if isinstance(contents, list): - contents = "\n".join(contents) - - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue - - if line[0] == ".": - self.data += "\\&" + line + "\n" - else: - self.data += line + "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_function(self, fname, name, args): - """output function in man""" - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - if args.get('functiontype', ''): - self.data += f'.B "{args["functiontype"]}" {name}' + "\n" - else: - self.data += f'.B "{name}' + "\n" - - count = 0 - parenth = "(" - post = "," - - for parameter in args.parameterlist: - if count == len(args.parameterlist) - 1: - post = ");" - - dtype = args.parametertypes.get(parameter, "") - if function_pointer.match(dtype): - # Pointer-to-function - self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" - else: - dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) - - self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" - count += 1 - parenth = "" - - if args.parameterlist: - self.data += ".SH ARGUMENTS\n" - - for parameter in args.parameterlist: - parameter_name = re.sub(r'\[.*', '', parameter) - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section.upper()}"' + "\n" - self.output_highlight(text) - - def out_enum(self, fname, name, args): - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"enum {name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - self.data += f"enum {name}" + " {\n" - - count = 0 - for parameter in args.parameterlist: - self.data += f'.br\n.BI " {parameter}"' + "\n" - if count == len(args.parameterlist) - 1: - self.data += "\n};\n" - else: - self.data += ", \n.br\n" - - count += 1 - - self.data += ".SH Constants\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_typedef(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"typedef {name} \\- {purpose}\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_struct(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - definition = args.get('definition') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{args.type} {name} \\- {purpose}\n" - - # Replace tabs with two spaces and handle newlines - declaration = definition.replace("\t", " ") - declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) - - self.data += ".SH SYNOPSIS\n" - self.data += f"{args.type} {name} " + "{" + "\n.br\n" - self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" - - self.data += ".SH Members\n" - for parameter in args.parameterlist: - if parameter.startswith("#"): - continue - - parameter_name = re.sub(r"\[.*", "", parameter) - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name)) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py deleted file mode 100644 index f7dbb0868367..000000000000 --- a/scripts/lib/kdoc/kdoc_parser.py +++ /dev/null @@ -1,1667 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 - -""" -kdoc_parser -=========== - -Read a C language source or header FILE and extract embedded -documentation comments -""" - -import sys -import re -from pprint import pformat - -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - -# Allow whitespace at end of comment start. -doc_start = KernRe(r'^/\*\*\s*$', cache=False) - -doc_end = KernRe(r'\*/', cache=False) -doc_com = KernRe(r'\s*\*\s*', cache=False) -doc_com_body = KernRe(r'\s*\* ?', cache=False) -doc_decl = doc_com + KernRe(r'(\w+)', cache=False) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -known_section_names = 'description|context|returns?|notes?|examples?' -known_sections = KernRe(known_section_names, flags = re.I) -doc_sect = doc_com + \ - KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', - flags=re.I, cache=False) - -doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) - -export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) - -type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# -# Tests for the beginning of a kerneldoc block in its various forms. -# -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) -doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) -doc_begin_func = KernRe(str(doc_com) + # initial " * ' - r"(?:\w+\s*\*\s*)?" + # type (not captured) - r'(?:define\s+)?' + # possible "define" (not captured) - r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" - r'(?:[-:].*)?$', # description (not captured) - cache = False) - -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end limiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Apply a set of transforms to a block of text. -# -def apply_transforms(xforms, text): - for search, subst in xforms: - text = search.sub(subst, text) - return text - -# -# A little helper to get rid of excess white space -# -multi_space = KernRe(r'\s\s+') -def trim_whitespace(s): - return multi_space.sub(' ', s.strip()) - -# -# Remove struct/enum members that have been marked "private". -# -def trim_private_members(text): - # - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() - -class state: - """ - State machine enums - """ - - # Parser states - NORMAL = 0 # normal code - NAME = 1 # looking for function name - DECLARATION = 2 # We have seen a declaration which might not be done - BODY = 3 # the body of the comment - SPECIAL_SECTION = 4 # doc section ending with a blank line - PROTO = 5 # scanning prototype - DOCBLOCK = 6 # documentation block - INLINE_NAME = 7 # gathering doc outside main block - INLINE_TEXT = 8 # reading the body of inline docs - - name = [ - "NORMAL", - "NAME", - "DECLARATION", - "BODY", - "SPECIAL_SECTION", - "PROTO", - "DOCBLOCK", - "INLINE_NAME", - "INLINE_TEXT", - ] - - -SECTION_DEFAULT = "Description" # default section - -class KernelEntry: - - def __init__(self, config, fname, ln): - self.config = config - self.fname = fname - - self._contents = [] - self.prototype = "" - - self.warnings = [] - - self.parameterlist = [] - self.parameterdescs = {} - self.parametertypes = {} - self.parameterdesc_start_lines = {} - - self.section_start_lines = {} - self.sections = {} - - self.anon_struct_union = False - - self.leading_space = None - - self.fname = fname - - # State flags - self.brcount = 0 - self.declaration_start_line = ln + 1 - - # - # Management of section contents - # - def add_text(self, text): - self._contents.append(text) - - def contents(self): - return '\n'.join(self._contents) + '\n' - - # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - log_msg = f"{self.fname}:{ln} {msg}" - - if not warning: - self.config.log.info(log_msg) - return - - # Delegate warning output to output logic, as this way it - # will report warnings/info only for symbols that are output - - self.warnings.append(log_msg) - return - - # - # Begin a new section. - # - def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): - if dump: - self.dump_section(start_new = True) - self.section = title - self.new_start_line = line_no - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - # - # If we have accumulated no contents in the default ("description") - # section, don't bother. - # - if self.section == SECTION_DEFAULT and not self._contents: - return - name = self.section - contents = self.contents() - - if type_param.match(name): - name = type_param.group(1) - - self.parameterdescs[name] = contents - self.parameterdesc_start_lines[name] = self.new_start_line - - self.new_start_line = 0 - - else: - if name in self.sections and self.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != SECTION_DEFAULT: - self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'") - # Treat as a new paragraph - add a blank line - self.sections[name] += '\n' + contents - else: - self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line - self.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) - - if start_new: - self.section = SECTION_DEFAULT - self._contents = [] - -python_warning = False - -class KernelDoc: - """ - Read a C language source or header FILE and extract embedded - documentation comments. - """ - - # Section names - - section_context = "Context" - section_return = "Return" - - undescribed = "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname = fname - self.config = config - - # Initial state for the state machines - self.state = state.NORMAL - - # Store entry currently being processed - self.entry = None - - # Place all potential outputs into an array - self.entries = [] - - # - # We need Python 3.7 for its "dicts remember the insertion - # order" guarantee - # - global python_warning - if (not python_warning and - sys.version_info.major == 3 and sys.version_info.minor < 7): - - self.emit_msg(0, - 'Python 3.7 or later is required for correct results') - python_warning = True - - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - if self.entry: - self.entry.emit_msg(ln, msg, warning=warning) - return - - log_msg = f"{self.fname}:{ln} {msg}" - - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - if self.entry: - self.entry.dump_section(start_new) - - # TODO: rename it to store_declaration after removal of kernel-doc.pl - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - item = KdocItem(name, self.fname, dtype, - self.entry.declaration_start_line, **args) - item.warnings = self.entry.warnings - - # Drop empty sections - # TODO: improve empty sections logic to emit warnings - sections = self.entry.sections - for section in ["Description", "Return"]: - if section in sections and not sections[section].rstrip(): - del sections[section] - item.set_sections(sections, self.entry.section_start_lines) - item.set_params(self.entry.parameterlist, self.entry.parameterdescs, - self.entry.parametertypes, - self.entry.parameterdesc_start_lines) - self.entries.append(item) - - self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - # - # Flush the warnings out before we proceed further - # - if self.entry and self.entry not in self.entries: - for log_msg in self.entry.warnings: - self.config.log.warning(log_msg) - - self.entry = KernelEntry(self.config, self.fname, ln) - - # State flags - self.state = state.NORMAL - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - """ - Store parameters and their descriptions at self.entry. - """ - - if self.entry.anon_struct_union and dtype == "" and param == "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union = False - - param = KernRe(r'[\[\)].*').sub('', param, count=1) - - # - # Look at various "anonymous type" cases. - # - if dtype == '': - if param.endswith("..."): - if len(param) > 3: # there is a name provided, use that - param = param[:-3] - if not self.entry.parameterdescs.get(param): - self.entry.parameterdescs[param] = "variable arguments" - - elif (not param) or param == "void": - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith("#"): - self.entry.parameterdescs[param] = self.undescribed - - if "." not in param: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_msg(ln, - f"{dname} '{param}' not described in '{declaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg = KernRe(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] = org_arg - - - def create_parameter_list(self, ln, decl_type, args, - splitter, declaration_name): - """ - Creates a list of parameters, storing them at self.entry. - """ - - # temporarily replace all commas inside function pointer definition - arg_expr = KernRe(r'(\([^\),]+),') - while arg_expr.search(args): - args = arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Ignore argument attributes - arg = KernRe(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg = arg.strip() - arg = KernRe(r'\s+').sub(' ', arg, count=1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - # - # The pointer-to-function case. - # - elif KernRe(r'\(.+\)\s*\(').search(arg): - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" - r'([\w\[\].]*)' # Capture the name and possible [array] - r'\s*\)') # Make sure the trailing ")" is there - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - # - # The array-of-pointers case. Dig the parameter name out from the middle - # of the declaration. - # - elif KernRe(r'\(.+\)\s*\[').search(arg): - r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" - r'([\w.]*?)' # The actual pointer name - r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif arg: - # - # Clean up extraneous spaces and split the string at commas; the first - # element of the resulting list will also include the type information. - # - arg = KernRe(r'\s*:\s*').sub(":", arg) - arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - # - # args[0] has a string of "type a". If "a" includes an [array] - # declaration, we want to not be fooled by any white space inside - # the brackets, so detect and handle that case specially. - # - r = KernRe(r'^([^[\]]*\s+)(.*)$') - if r.match(args[0]): - args[0] = r.group(2) - dtype = r.group(1) - else: - # No space in args[0]; this seems wrong but preserves previous behavior - dtype = '' - - bitfield_re = KernRe(r'(.*?):(\w+)') - for param in args: - # - # For pointers, shift the star(s) from the variable name to the - # type declaration. - # - r = KernRe(r'^(\*+)\s*(.*)') - if r.match(param): - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - # - # Perform a similar shift for bitfields. - # - elif bitfield_re.search(param): - if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, bitfield_re.group(1), - f"{dtype}:{bitfield_re.group(2)}", - arg, declaration_name) - else: - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type): - """ - Check for errors inside sections, emitting warnings if not found - parameters are described. - """ - for section in self.entry.sections: - if section not in self.entry.parameterlist and \ - not known_sections.search(section): - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - self.emit_msg(ln, - f"Excess {dname} '{section}' description in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - """ - If the function doesn't return void, warns about the lack of a - return description. - """ - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_msg(ln, - f"No description found for return value of '{declaration_name}'") - - # - # Split apart a structure prototype; returns (struct|union, name, members) or None - # - def split_struct_proto(self, proto): - type_pattern = r'(struct|union)' - qualifiers = [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - return (r.group(1), r.group(2), r.group(3)) - else: - r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - return (r.group(1), r.group(3), r.group(2)) - return None - # - # Rewrite the members of a structure or union for easier formatting later on. - # Among other things, this function will turn a member like: - # - # struct { inner_members; } foo; - # - # into: - # - # struct foo; inner_members; - # - def rewrite_struct_members(self, members): - # - # Process struct/union members from the most deeply nested outward. The - # trick is in the ^{ below - it prevents a match of an outer struct/union - # until the inner one has been munged (removing the "{" in the process). - # - struct_members = KernRe(r'(struct|union)' # 0: declaration type - r'([^\{\};]+)' # 1: possible name - r'(\{)' - r'([^\{\}]*)' # 3: Contents of declaration - r'(\})' - r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration - tuples = struct_members.findall(members) - while tuples: - for t in tuples: - newmember = "" - oldmember = "".join(t) # Reconstruct the original formatting - dtype, name, lbr, content, rbr, rest, semi = t - # - # Pass through each field name, normalizing the form and formatting. - # - for s_id in rest.split(','): - s_id = s_id.strip() - newmember += f"{dtype} {s_id}; " - # - # Remove bitfield/array/pointer info, getting the bare name. - # - s_id = KernRe(r'[:\[].*').sub('', s_id) - s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - # - # Pass through the members of this inner structure/union. - # - for arg in content.split(';'): - arg = arg.strip() - # - # Look for (type)(*name)(args) - pointer to function - # - r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') - if r.match(arg): - dtype, name, extra = r.group(1), r.group(2), r.group(3) - # Pointer-to-function - if not s_id: - # Anonymous struct/union - newmember += f"{dtype}{name}{extra}; " - else: - newmember += f"{dtype}{s_id}.{name}{extra}; " - # - # Otherwise a non-function member. - # - else: - # - # Remove bitmap and array portions and spaces around commas - # - arg = KernRe(r':\s*\d+\s*').sub('', arg) - arg = KernRe(r'\[.*\]').sub('', arg) - arg = KernRe(r'\s*,\s*').sub(',', arg) - # - # Look for a normal decl - "type name[,name...]" - # - r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - for name in r.group(2).split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) - if not s_id: - # Anonymous struct/union - newmember += f"{r.group(1)} {name}; " - else: - newmember += f"{r.group(1)} {s_id}.{name}; " - else: - newmember += f"{arg}; " - # - # At the end of the s_id loop, replace the original declaration with - # the munged version. - # - members = members.replace(oldmember, newmember) - # - # End of the tuple loop - search again and see if there are outer members - # that now turn up. - # - tuples = struct_members.findall(members) - return members - - # - # Format the struct declaration into a standard form for inclusion in the - # resulting docs. - # - def format_struct_decl(self, declaration): - # - # Insert newlines, get rid of extra spaces. - # - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - # - # Format inline enums with each member on its own line. - # - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - while r.search(declaration): - declaration = r.sub(r'\1,\n\2', declaration) - # - # Now go through and supply the right number of tabs - # for each line. - # - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) - if clause: - if '}' in clause and level > 1: - level -= 1 - if not clause.startswith('#'): - declaration += "\t" * level - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - return declaration - - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - # - # Deal with embedded struct and union members, and drop enums entirely. - # - declaration = members - members = self.rewrite_struct_members(members) - members = re.sub(r'(\{[^\{\}]*\})', '', members) - # - # Output the result and we are done. - # - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type) - self.output_declaration(decl_type, declaration_name, - definition=self.format_struct_decl(declaration), - purpose=self.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - """ - Stores an enum inside self.entries array. - """ - # - # Strip preprocessor directives. Note that this depends on the - # trailing semicolon we added in process_proto_type(). - # - proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # - # Parse out the name and members of the enum. Typedef form first. - # - r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name = r.group(2) - members = trim_private_members(r.group(1)) - # - # Failing that, look for a straight enum - # - else: - r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name = r.group(1) - members = trim_private_members(r.group(2)) - # - # OK, this isn't going to work. - # - else: - self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") - return - # - # Make sure we found what we were expecting. - # - if self.entry.identifier != declaration_name: - if self.entry.identifier == "": - self.emit_msg(ln, - f"{proto}: wrong kernel-doc identifier on prototype") - else: - self.emit_msg(ln, - f"expecting prototype for enum {self.entry.identifier}. " - f"Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name = "(anonymous)" - # - # Parse out the name of each enum member, and verify that we - # have a description for it. - # - member_set = set() - members = KernRe(r'\([^;)]*\)').sub('', members) - for arg in members.split(','): - if not arg: - continue - arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] = self.undescribed - self.emit_msg(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") - member_set.add(arg) - # - # Ensure that every described member actually exists in the enum. - # - for k in self.entry.parameterdescs: - if k not in member_set: - self.emit_msg(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - purpose=self.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - """ - Stores a data declaration inside self.entries array. - """ - - if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) - elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) - elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - else: - # This would be a bug - self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - - def dump_function(self, ln, prototype): - """ - Stores a function of function macro inside self.entries array. - """ - - found = func_macro = False - return_type = '' - decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) - # - # If we have a macro, remove the "#define" at the front. - # - new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) - if new_proto != prototype: - prototype = new_proto - # - # Dispense with the simple "#define A B" case here; the key - # is the space after the name of the symbol being defined. - # NOTE that the seemingly misnamed "func_macro" indicates a - # macro *without* arguments. - # - r = KernRe(r'^(\w+)\s+') - if r.search(prototype): - return_type = '' - declaration_name = r.group(1) - func_macro = True - found = True - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - - name = r'\w+' - type1 = r'(?:[\w\s]+)?' - type2 = r'(?:[\w\s]+\*+)+' - # - # Attempt to match first on (args) with no internal parentheses; this - # lets us easily filter out __acquires() and other post-args stuff. If - # that fails, just grab the rest of the line to the last closing - # parenthesis. - # - proto_args = r'\(([^\(]*|.*)\)' - # - # (Except for the simple macro case) attempt to split up the prototype - # in the various ways we understand. - # - if not found: - patterns = [ - rf'^()({name})\s*{proto_args}', - rf'^({type1})\s+({name})\s*{proto_args}', - rf'^({type2})\s*({name})\s*{proto_args}', - ] - - for p in patterns: - r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) - declaration_name = r.group(2) - args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', - declaration_name) - found = True - break - # - # Parsing done; make sure that things are as we expect. - # - if not found: - self.emit_msg(ln, - f"cannot understand function prototype: '{prototype}'") - return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " - f"Prototype was for {declaration_name}() instead") - return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) - # - # Store the result. - # - self.output_declaration(decl_type, declaration_name, - typedef=('typedef' in return_type), - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - - - def dump_typedef(self, ln, proto): - """ - Stores a typedef inside self.entries array. - """ - # - # We start by looking for function typedefs. - # - typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' - typedef_ident = r'\*?\s*(\w\S+)\s*' - typedef_args = r'\s*\((.*)\);' - - typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) - - # Parse function typedef prototypes - for r in [typedef1, typedef2]: - if not r.match(proto): - continue - - return_type = r.group(1).strip() - declaration_name = r.group(2) - args = r.group(3) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.create_parameter_list(ln, 'function', args, ',', declaration_name) - - self.output_declaration('function', declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose) - return - # - # Not a function, try to parse a simple typedef. - # - r = KernRe(r'typedef.*\s+(\w+)\s*;') - if r.match(proto): - declaration_name = r.group(1) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.output_declaration('typedef', declaration_name, - purpose=self.entry.declaration_purpose) - return - - self.emit_msg(ln, "error: Cannot parse typedef!") - - @staticmethod - def process_export(function_set, line): - """ - process EXPORT_SYMBOL* tags - - This method doesn't use any variable from the class, so declare it - with a staticmethod decorator. - """ - - # We support documenting some exported symbols with different - # names. A horrible hack. - suffixes = [ '_noprof' ] - - # Note: it accepts only one EXPORT_SYMBOL* per line, as having - # multiple export lines would violate Kernel coding style. - - if export_symbol.search(line): - symbol = export_symbol.group(2) - elif export_symbol_ns.search(line): - symbol = export_symbol_ns.group(2) - else: - return False - # - # Found an export, trim out any special suffixes - # - for suffix in suffixes: - # Be backward compatible with Python < 3.9 - if symbol.endswith(suffix): - symbol = symbol[:-len(suffix)] - function_set.add(symbol) - return True - - def process_normal(self, ln, line): - """ - STATE_NORMAL: looking for the /** to begin everything. - """ - - if not doc_start.match(line): - return - - # start a new entry - self.reset_state(ln) - - # next line is always the function name - self.state = state.NAME - - def process_name(self, ln, line): - """ - STATE_NAME: Looking for the "name - description" line - """ - # - # Check for a DOC: block and handle them specially. - # - if doc_block.search(line): - - if not doc_block.group(1): - self.entry.begin_section(ln, "Introduction") - else: - self.entry.begin_section(ln, doc_block.group(1)) - - self.entry.identifier = self.entry.section - self.state = state.DOCBLOCK - # - # Otherwise we're looking for a normal kerneldoc declaration line. - # - elif doc_decl.search(line): - self.entry.identifier = doc_decl.group(1) - - # Test for data declaration - if doc_begin_data.search(line): - self.entry.decl_type = doc_begin_data.group(1) - self.entry.identifier = doc_begin_data.group(2) - # - # Look for a function description - # - elif doc_begin_func.search(line): - self.entry.identifier = doc_begin_func.group(1) - self.entry.decl_type = "function" - # - # We struck out. - # - else: - self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = state.NORMAL - return - # - # OK, set up for a new kerneldoc entry. - # - self.state = state.BODY - self.entry.identifier = self.entry.identifier.strip(" ") - # if there's no @param blocks need to set up default section here - self.entry.begin_section(ln + 1) - # - # Find the description portion, which *should* be there but - # isn't always. - # (We should be able to capture this from the previous parsing - someday) - # - r = KernRe("[-:](.*)") - if r.search(line): - self.entry.declaration_purpose = trim_whitespace(r.group(1)) - self.state = state.DECLARATION - else: - self.entry.declaration_purpose = "" - - if not self.entry.declaration_purpose and self.config.wshort_desc: - self.emit_msg(ln, - f"missing initial short description on line:\n{line}") - - if not self.entry.identifier and self.entry.decl_type != "enum": - self.emit_msg(ln, - f"wrong kernel-doc identifier on line:\n{line}") - self.state = state.NORMAL - - if self.config.verbose: - self.emit_msg(ln, - f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", - warning=False) - # - # Failed to find an identifier. Emit a warning - # - else: - self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - - # - # Helper function to determine if a new section is being started. - # - def is_new_section(self, ln, line): - if doc_sect.search(line): - self.state = state.BODY - # - # Pick out the name of our new section, tweaking it if need be. - # - newsection = doc_sect.group(1) - if newsection.lower() == 'description': - newsection = 'Description' - elif newsection.lower() == 'context': - newsection = 'Context' - self.state = state.SPECIAL_SECTION - elif newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection = "Return" - self.state = state.SPECIAL_SECTION - elif newsection[0] == '@': - self.state = state.SPECIAL_SECTION - # - # Initialize the contents, and get the new section going. - # - newcontents = doc_sect.group(2) - if not newcontents: - newcontents = "" - self.dump_section() - self.entry.begin_section(ln, newsection) - self.entry.leading_space = None - - self.entry.add_text(newcontents.lstrip()) - return True - return False - - # - # Helper function to detect (and effect) the end of a kerneldoc comment. - # - def is_comment_end(self, ln, line): - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') - if r.match(line): - self.emit_msg(ln, f"suspicious ending line: {line}") - - self.entry.prototype = "" - self.entry.new_start_line = ln + 1 - - self.state = state.PROTO - return True - return False - - - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # Look for anything with the " * " line beginning. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # A blank line means that we have moved out of the declaration - # part of the comment (without any "special section" parameter - # descriptions). - # - if cont == "": - self.state = state.BODY - # - # Otherwise we have more of the declaration section to soak up. - # - else: - self.entry.declaration_purpose = \ - trim_whitespace(self.entry.declaration_purpose + ' ' + cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - - def process_special(self, ln, line): - """ - STATE_SPECIAL_SECTION: a section ending with a blank line - """ - # - # If we have hit a blank line (only the " * " marker), then this - # section is done. - # - if KernRe(r"\s*\*\s*$").match(line): - self.entry.begin_section(ln, dump = True) - self.state = state.BODY - return - # - # Not a blank line, look for the other ways to end the section. - # - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # OK, we should have a continuation of the text for this section. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # If the lines of text after the first in a special section have - # leading white space, we need to trim it out or Sphinx will get - # confused. For the second line (the None case), see what we - # find there and remember it. - # - if self.entry.leading_space is None: - r = KernRe(r'^(\s+)') - if r.match(cont): - self.entry.leading_space = len(r.group(1)) - else: - self.entry.leading_space = 0 - # - # Otherwise, before trimming any leading chars, be *sure* - # that they are white space. We should maybe warn if this - # isn't the case. - # - for i in range(0, self.entry.leading_space): - if cont[i] != " ": - self.entry.leading_space = i - break - # - # Add the trimmed result to the section and we're done. - # - self.entry.add_text(cont[self.entry.leading_space:]) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_body(self, ln, line): - """ - STATE_BODY: the bulk of a kerneldoc comment. - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - - if doc_content.search(line): - cont = doc_content.group(1) - self.entry.add_text(cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_inline_name(self, ln, line): - """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" - - if doc_inline_sect.search(line): - self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.add_text(doc_inline_sect.group(2).lstrip()) - self.state = state.INLINE_TEXT - elif doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") - self.state = state.PROTO - # else ... ?? - - def process_inline_text(self, ln, line): - """STATE_INLINE_TEXT: docbook comments within a prototype.""" - - if doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - # else ... ?? - - def syscall_munge(self, ln, proto): # pylint: disable=W0613 - """ - Handle syscall definitions - """ - - is_void = False - - # Strip newlines/CR's - proto = re.sub(r'[\r\n]+', ' ', proto) - - # Check if it's a SYSCALL_DEFINE0 - if 'SYSCALL_DEFINE0' in proto: - is_void = True - - # Replace SYSCALL_DEFINE with correct return type & function name - proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - - r = KernRe(r'long\s+(sys_.*?),') - if r.search(proto): - proto = KernRe(',').sub('(', proto, count=1) - elif is_void: - proto = KernRe(r'\)').sub('(void)', proto, count=1) - - # Now delete all of the odd-numbered commas in the proto - # so that argument types & names don't have a comma between them - count = 0 - length = len(proto) - - if is_void: - length = 0 # skip the loop if is_void - - for ix in range(length): - if proto[ix] == ',': - count += 1 - if count % 2 == 1: - proto = proto[:ix] + ' ' + proto[ix + 1:] - - return proto - - def tracepoint_munge(self, ln, proto): - """ - Handle tracepoint definitions - """ - - tracepointname = None - tracepointargs = None - - # Match tracepoint name based on different patterns - r = KernRe(r'TRACE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') - if r.search(proto): - tracepointname = r.group(2) - - if tracepointname: - tracepointname = tracepointname.lstrip() - - r = KernRe(r'TP_PROTO\((.*?)\)') - if r.search(proto): - tracepointargs = r.group(1) - - if not tracepointname or not tracepointargs: - self.emit_msg(ln, - f"Unrecognized tracepoint format:\n{proto}\n") - else: - proto = f"static inline void trace_{tracepointname}({tracepointargs})" - self.entry.identifier = f"trace_{self.entry.identifier}" - - return proto - - def process_proto_function(self, ln, line): - """Ancillary routine to process a function prototype""" - - # strip C99-style comments to end of line - line = KernRe(r"//.*$", re.S).sub('', line) - # - # Soak up the line's worth of prototype text, stopping at { or ; if present. - # - if KernRe(r'\s*#\s*define').match(line): - self.entry.prototype = line - elif not line.startswith('#'): # skip other preprocessor stuff - r = KernRe(r'([^\{]*)') - if r.match(line): - self.entry.prototype += r.group(1) + " " - # - # If we now have the whole prototype, clean it up and declare victory. - # - if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): - # strip comments and surrounding spaces - self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() - # - # Handle self.entry.prototypes for function pointers like: - # int (*pcs_config)(struct foo) - # by turning it into - # int pcs_config(struct foo) - # - r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') - self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - # - # Handle special declaration syntaxes - # - if 'SYSCALL_DEFINE' in self.entry.prototype: - self.entry.prototype = self.syscall_munge(ln, - self.entry.prototype) - else: - r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype = self.tracepoint_munge(ln, - self.entry.prototype) - # - # ... and we're done - # - self.dump_function(ln, self.entry.prototype) - self.reset_state(ln) - - def process_proto_type(self, ln, line): - """Ancillary routine to process a type""" - - # Strip C99-style comments and surrounding whitespace - line = KernRe(r"//.*$", re.S).sub('', line).strip() - if not line: - return # nothing to see here - - # To distinguish preprocessor directive from regular declaration later. - if line.startswith('#'): - line += ";" - # - # Split the declaration on any of { } or ;, and accumulate pieces - # until we hit a semicolon while not inside {brackets} - # - r = KernRe(r'(.*?)([{};])') - for chunk in r.split(line): - if chunk: # Ignore empty matches - self.entry.prototype += chunk - # - # This cries out for a match statement ... someday after we can - # drop Python 3.9 ... - # - if chunk == '{': - self.entry.brcount += 1 - elif chunk == '}': - self.entry.brcount -= 1 - elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) - self.reset_state(ln) - return - # - # We hit the end of the line while still in the declaration; put - # in a space to represent the newline. - # - self.entry.prototype += ' ' - - def process_proto(self, ln, line): - """STATE_PROTO: reading a function/whatever prototype.""" - - if doc_inline_oneline.search(line): - self.entry.begin_section(ln, doc_inline_oneline.group(1)) - self.entry.add_text(doc_inline_oneline.group(2)) - self.dump_section() - - elif doc_inline_start.search(line): - self.state = state.INLINE_NAME - - elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) - - else: - self.process_proto_type(ln, line) - - def process_docblock(self, ln, line): - """STATE_DOCBLOCK: within a DOC: block.""" - - if doc_end.search(line): - self.dump_section() - self.output_declaration("doc", self.entry.identifier) - self.reset_state(ln) - - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - - def parse_export(self): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - - for line in fp: - self.process_export(export_table, line) - - except IOError: - return None - - return export_table - - # - # The state/action table telling us which function to invoke in - # each state. - # - state_actions = { - state.NORMAL: process_normal, - state.NAME: process_name, - state.BODY: process_body, - state.DECLARATION: process_decl, - state.SPECIAL_SECTION: process_special, - state.INLINE_NAME: process_inline_name, - state.INLINE_TEXT: process_inline_text, - state.PROTO: process_proto, - state.DOCBLOCK: process_docblock, - } - - def parse_kdoc(self): - """ - Open and process each line of a C source file. - The parsing is controlled via a state machine, and the line is passed - to a different process function depending on the state. The process - function may update the state as needed. - - Besides parsing kernel-doc tags, it also parses export symbols. - """ - - prev = "" - prev_ln = None - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - for ln, line in enumerate(fp): - - line = line.expandtabs().strip("\n") - - # Group continuation lines on prototypes - if self.state == state.PROTO: - if line.endswith("\\"): - prev += line.rstrip("\\") - if not prev_ln: - prev_ln = ln - continue - - if prev: - ln = prev_ln - line = prev + line - prev = "" - prev_ln = None - - self.config.log.debug("%d %s: %s", - ln, state.name[self.state], - line) - - # This is an optimization over the original script. - # There, when export_file was used for the same file, - # it was read twice. Here, we use the already-existing - # loop to parse exported symbols as well. - # - if (self.state != state.NORMAL) or \ - not self.process_export(export_table, line): - # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - except OSError: - self.config.log.error(f"Error: Cannot open file {self.fname}") - - return export_table, self.entries diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py deleted file mode 100644 index 612223e1e723..000000000000 --- a/scripts/lib/kdoc/kdoc_re.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . - -""" -Regular expression ancillary classes. - -Those help caching regular expressions and do matching for kernel-doc. -""" - -import re - -# Local cache for regular expressions -re_cache = {} - - -class KernRe: - """ - Helper class to simplify regex declaration and usage, - - It calls re.compile for a given pattern. It also allows adding - regular expressions and define sub at class init time. - - Regular expressions can be cached via an argument, helping to speedup - searches. - """ - - def _add_regex(self, string, flags): - """ - Adds a new regex or re-use it from the cache. - """ - self.regex = re_cache.get(string, None) - if not self.regex: - self.regex = re.compile(string, flags=flags) - if self.cache: - re_cache[string] = self.regex - - def __init__(self, string, cache=True, flags=0): - """ - Compile a regular expression and initialize internal vars. - """ - - self.cache = cache - self.last_match = None - - self._add_regex(string, flags) - - def __str__(self): - """ - Return the regular expression pattern. - """ - return self.regex.pattern - - def __add__(self, other): - """ - Allows adding two regular expressions into one. - """ - - return KernRe(str(self) + str(other), cache=self.cache or other.cache, - flags=self.regex.flags | other.regex.flags) - - def match(self, string): - """ - Handles a re.match storing its results - """ - - self.last_match = self.regex.match(string) - return self.last_match - - def search(self, string): - """ - Handles a re.search storing its results - """ - - self.last_match = self.regex.search(string) - return self.last_match - - def findall(self, string): - """ - Alias to re.findall - """ - - return self.regex.findall(string) - - def split(self, string): - """ - Alias to re.split - """ - - return self.regex.split(string) - - def sub(self, sub, string, count=0): - """ - Alias to re.sub - """ - - return self.regex.sub(sub, string, count=count) - - def group(self, num): - """ - Returns the group results of the last match - """ - - return self.last_match.group(num) - - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parenthesis of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. - # - # The content inside parenthesis are converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what it is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): - """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. - """ - - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - """ - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if r'\1' is used, it works just like re: it places there the - matched paired data with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c0997d6861dc..c48bb05dad82 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -9,13 +9,17 @@ """ Detect problematic Noto CJK variable fonts. -or more details, see lib/latex_fonts.py. +or more details, see .../tools/lib/python/kdoc/latex_fonts.py. """ import argparse import sys +import os.path -from lib.latex_fonts import LatexFontChecker +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) + +from latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index da69e77559cc..e0abfe12fac7 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,7 +14,7 @@ import sys # Import Python modules -LIB_DIR = "../../scripts/lib/abi" +LIB_DIR = "../lib/python/abi" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/tools/docs/lib/__init__.py b/tools/docs/lib/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tools/docs/lib/enrich_formatter.py b/tools/docs/lib/enrich_formatter.py deleted file mode 100644 index bb171567a4ca..000000000000 --- a/tools/docs/lib/enrich_formatter.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2025 by Mauro Carvalho Chehab . - -""" -Ancillary argparse HelpFormatter class that works on a similar way as -argparse.RawDescriptionHelpFormatter, e.g. description maintains line -breaks, but it also implement transformations to the help text. The -actual transformations ar given by enrich_text(), if the output is tty. - -Currently, the follow transformations are done: - - - Positional arguments are shown in upper cases; - - if output is TTY, ``var`` and positional arguments are shown prepended - by an ANSI SGR code. This is usually translated to bold. On some - terminals, like, konsole, this is translated into a colored bold text. -""" - -import argparse -import re -import sys - -class EnrichFormatter(argparse.HelpFormatter): - """ - Better format the output, making easier to identify the positional args - and how they're used at the __doc__ description. - """ - def __init__(self, *args, **kwargs): - """Initialize class and check if is TTY""" - super().__init__(*args, **kwargs) - self._tty = sys.stdout.isatty() - - def enrich_text(self, text): - """Handle ReST markups (currently, only ``foo``)""" - if self._tty and text: - # Replace ``text`` with ANSI SGR (bold) - return re.sub(r'\`\`(.+?)\`\`', - lambda m: f'\033[1m{m.group(1)}\033[0m', text) - return text - - def _fill_text(self, text, width, indent): - """Enrich descriptions with markups on it""" - enriched = self.enrich_text(text) - return "\n".join(indent + line for line in enriched.splitlines()) - - def _format_usage(self, usage, actions, groups, prefix): - """Enrich positional arguments at usage: line""" - - prog = self._prog - parts = [] - - for action in actions: - if action.option_strings: - opt = action.option_strings[0] - if action.nargs != 0: - opt += f" {action.dest.upper()}" - parts.append(f"[{opt}]") - else: - # Positional argument - parts.append(self.enrich_text(f"``{action.dest.upper()}``")) - - usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" - return usage_text - - def _format_action_invocation(self, action): - """Enrich argument names""" - if not action.option_strings: - return self.enrich_text(f"``{action.dest.upper()}``") - - return ", ".join(action.option_strings) diff --git a/tools/docs/lib/latex_fonts.py b/tools/docs/lib/latex_fonts.py deleted file mode 100755 index 29317f8006ea..000000000000 --- a/tools/docs/lib/latex_fonts.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# Ported to Python by (c) Mauro Carvalho Chehab, 2025 - -""" -Detect problematic Noto CJK variable fonts. - -For "make pdfdocs", reports of build errors of translations.pdf started -arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -tumbleweed have started deploying variable-font [3] format of "Noto CJK" -fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -does not (and likely never will) understand variable fonts for historical -reasons. - -The build error happens even when both of variable- and non-variable-format -fonts are found on the build system. To make matters worse, Fedora enlists -variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, --zh_TW, etc. Hence developers who have interest in CJK pages are more -likely to encounter the build errors. - -This script is invoked from the error path of "make pdfdocs" and emits -suggestions if variable-font files of "Noto CJK" fonts are in the list of -fonts accessible from XeTeX. - -References: -[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -[3]: https://en.wikipedia.org/wiki/Variable_font -[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -[5]: https://build.opensuse.org/request/show/1157217 - -#=========================================================================== -Workarounds for building translations.pdf -#=========================================================================== - -* Denylist "variable font" Noto CJK fonts. - - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with - tweaks if necessary. Remove leading "". - - Path of fontconfig/fonts.conf can be overridden by setting an env - variable FONTS_CONF_DENY_VF. - - * Template: ------------------------------------------------------------------ - - - - - - - - /usr/share/fonts/google-noto-*-cjk-vf-fonts - - /usr/share/fonts/truetype/Noto*CJK*-VF.otf - - - ------------------------------------------------------------------ - - The denylisting is activated for "make pdfdocs". - -* For skipping CJK pages in PDF - - Uninstall texlive-xecjk. - Denylisting is not needed in this case. - -* For printing CJK pages in PDF - - Need non-variable "Noto CJK" fonts. - * Fedora - - google-noto-sans-cjk-fonts - - google-noto-serif-cjk-fonts - * openSUSE tumbleweed - - Non-variable "Noto CJK" fonts are not available as distro packages - as of April, 2024. Fetch a set of font files from upstream Noto - CJK Font released at: - https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc - and at: - https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc - , then uncompress and deploy them. - - Remember to update fontconfig cache by running fc-cache. - -!!! Caution !!! - Uninstalling "variable font" packages can be dangerous. - They might be depended upon by other packages important for your work. - Denylisting should be less invasive, as it is effective only while - XeLaTeX runs in "make pdfdocs". -""" - -import os -import re -import subprocess -import textwrap -import sys - -class LatexFontChecker: - """ - Detect problems with CJK variable fonts that affect PDF builds for - translations. - """ - - def __init__(self, deny_vf=None): - if not deny_vf: - deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") - - self.environ = os.environ.copy() - self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) - - self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") - - def description(self): - return __doc__ - - def get_noto_cjk_vf_fonts(self): - """Get Noto CJK fonts""" - - cjk_fonts = set() - cmd = ["fc-list", ":", "file", "family", "variable"] - try: - result = subprocess.run(cmd,stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - env=self.environ, - check=True) - - except subprocess.CalledProcessError as exc: - sys.exit(f"Error running fc-list: {repr(exc)}") - - for line in result.stdout.splitlines(): - if 'variable=True' not in line: - continue - - match = self.re_cjk.search(line) - if match: - cjk_fonts.add(match.group(1)) - - return sorted(cjk_fonts) - - def check(self): - """Check for problems with CJK fonts""" - - fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") - if not fonts: - return None - - rel_file = os.path.relpath(__file__, os.getcwd()) - - msg = "=" * 77 + "\n" - msg += 'XeTeX is confused by "variable font" files listed below:\n' - msg += fonts + "\n" - msg += textwrap.dedent(f""" - For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. - Or, CJK pages can be skipped by uninstalling texlive-xecjk. - - For more info on denylisting, other options, and variable font, run: - - tools/docs/check-variable-fonts.py -h - """) - msg += "=" * 77 - - return msg diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py deleted file mode 100755 index 25361996cd20..000000000000 --- a/tools/docs/lib/parse_data_structs.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . -# pylint: disable=R0912,R0915 - -""" -Parse a source file or header, creating ReStructured Text cross references. - -It accepts an optional file to change the default symbol reference or to -suppress symbols from the output. - -It is capable of identifying defines, functions, structs, typedefs, -enums and enum symbols and create cross-references for all of them. -It is also capable of distinguish #define used for specifying a Linux -ioctl. - -The optional rules file contains a set of rules like: - - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` -""" - -import os -import re -import sys - - -class ParseDataStructs: - """ - Creates an enriched version of a Kernel header file with cross-links - to each C data structure type. - - It is meant to allow having a more comprehensive documentation, where - uAPI headers will create cross-reference links to the code. - - It is capable of identifying defines, functions, structs, typedefs, - enums and enum symbols and create cross-references for all of them. - It is also capable of distinguish #define used for specifying a Linux - ioctl. - - By default, it create rules for all symbols and defines, but it also - allows parsing an exception file. Such file contains a set of rules - using the syntax below: - - 1. Ignore rules: - - ignore ` - - Removes the symbol from reference generation. - - 2. Replace rules: - - replace - - Replaces how old_symbol with a new reference. The new_reference can be: - - - A simple symbol name; - - A full Sphinx reference. - - 3. Namespace rules - - namespace - - Sets C namespace to be used during cross-reference generation. Can - be overridden by replace rules. - - On ignore and replace rules, can be: - - ioctl: for defines that end with _IO*, e.g. ioctl definitions - - define: for other defines - - symbol: for symbols defined within enums; - - typedef: for typedefs; - - enum: for the name of a non-anonymous enum; - - struct: for structs. - - Examples: - - ignore define __LINUX_MEDIA_H - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` - - namespace MC - """ - - # Parser regexes with multiple ways to capture enums and structs - RE_ENUMS = [ - re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*enum\s+([\w_]+)\s*$"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), - ] - RE_STRUCTS = [ - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), - ] - - # FIXME: the original code was written a long time before Sphinx C - # domain to have multiple namespaces. To avoid to much turn at the - # existing hyperlinks, the code kept using "c:type" instead of the - # right types. To change that, we need to change the types not only - # here, but also at the uAPI media documentation. - DEF_SYMBOL_TYPES = { - "ioctl": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "IOCTL Commands", - }, - "define": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Macros and Definitions", - }, - # We're calling each definition inside an enum as "symbol" - "symbol": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Enumeration values", - }, - "typedef": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Type Definitions", - }, - # This is the description of the enum itself - "enum": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Enumerations", - }, - "struct": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Structures", - }, - } - - def __init__(self, debug: bool = False): - """Initialize internal vars""" - self.debug = debug - self.data = "" - - self.symbols = {} - - self.namespace = None - self.ignore = [] - self.replace = [] - - for symbol_type in self.DEF_SYMBOL_TYPES: - self.symbols[symbol_type] = {} - - def read_exceptions(self, fname: str): - if not fname: - return - - name = os.path.basename(fname) - - with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: - for ln, line in enumerate(f): - ln += 1 - line = line.strip() - if not line or line.startswith("#"): - continue - - # ignore rules - match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) - - if match: - self.ignore.append((ln, match.group(1), match.group(2))) - continue - - # replace rules - match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) - if match: - self.replace.append((ln, match.group(1), match.group(2), - match.group(3))) - continue - - match = re.match(r"^namespace\s+(\S+)", line) - if match: - self.namespace = match.group(1) - continue - - sys.exit(f"{name}:{ln}: invalid line: {line}") - - def apply_exceptions(self): - """ - Process exceptions file with rules to ignore or replace references. - """ - - # Handle ignore rules - for ln, c_type, symbol in self.ignore: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - d = self.symbols[c_type] - if symbol in d: - del d[symbol] - - # Handle replace rules - for ln, c_type, old, new in self.replace: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - reftype = None - - # Parse reference type when the type is specified - - match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) - if match: - reftype = f":c:{match.group(1)}" - new = match.group(2) - else: - match = re.search(r"(\:ref)\:\`(.+)\`", new) - if match: - reftype = match.group(1) - new = match.group(2) - - # If the replacement rule doesn't have a type, get default - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") - - new_ref = f"{reftype}:`{old} <{new}>`" - - # Change self.symbols to use the replacement rule - if old in self.symbols[c_type]: - (_, ln) = self.symbols[c_type][old] - self.symbols[c_type][old] = (new_ref, ln) - else: - print(f"{name}:{ln}: Warning: can't find {old} {c_type}") - - def store_type(self, ln, symbol_type: str, symbol: str, - ref_name: str = None, replace_underscores: bool = True): - """ - Stores a new symbol at self.symbols under symbol_type. - - By default, underscores are replaced by "-" - """ - defs = self.DEF_SYMBOL_TYPES[symbol_type] - - prefix = defs.get("prefix", "") - suffix = defs.get("suffix", "") - ref_type = defs.get("ref_type") - - # Determine ref_link based on symbol type - if ref_type or self.namespace: - if not ref_name: - ref_name = symbol.lower() - - # c-type references don't support hash - if ref_type == ":ref" and replace_underscores: - ref_name = ref_name.replace("_", "-") - - # C domain references may have namespaces - if ref_type.startswith(":c:"): - if self.namespace: - ref_name = f"{self.namespace}.{ref_name}" - - if ref_type: - ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" - else: - ref_link = f"`{symbol} <{ref_name}>`" - else: - ref_link = symbol - - self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) - - def store_line(self, line): - """Stores a line at self.data, properly indented""" - line = " " + line.expandtabs() - self.data += line.rstrip(" ") - - def parse_file(self, file_in: str, exceptions: str = None): - """Reads a C source file and get identifiers""" - self.data = "" - is_enum = False - is_comment = False - multiline = "" - - self.read_exceptions(exceptions) - - with open(file_in, "r", - encoding="utf-8", errors="backslashreplace") as f: - for line_no, line in enumerate(f): - self.store_line(line) - line = line.strip("\n") - - # Handle continuation lines - if line.endswith(r"\\"): - multiline += line[-1] - continue - - if multiline: - line = multiline + line - multiline = "" - - # Handle comments. They can be multilined - if not is_comment: - if re.search(r"/\*.*", line): - is_comment = True - else: - # Strip C99-style comments - line = re.sub(r"(//.*)", "", line) - - if is_comment: - if re.search(r".*\*/", line): - is_comment = False - else: - multiline = line - continue - - # At this point, line variable may be a multilined statement, - # if lines end with \ or if they have multi-line comments - # With that, it can safely remove the entire comments, - # and there's no need to use re.DOTALL for the logic below - - line = re.sub(r"(/\*.*\*/)", "", line) - if not line.strip(): - continue - - # It can be useful for debug purposes to print the file after - # having comments stripped and multi-lines grouped. - if self.debug > 1: - print(f"line {line_no + 1}: {line}") - - # Now the fun begins: parse each type and store it. - - # We opted for a two parsing logic here due to: - # 1. it makes easier to debug issues not-parsed symbols; - # 2. we want symbol replacement at the entire content, not - # just when the symbol is detected. - - if is_enum: - match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) - if match: - self.store_type(line_no, "symbol", match.group(1)) - if "}" in line: - is_enum = False - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) - if match: - self.store_type(line_no, "ioctl", match.group(1), - replace_underscores=False) - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) - if match: - self.store_type(line_no, "define", match.group(1)) - continue - - match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", - line) - if match: - name = match.group(2).strip() - symbol = match.group(3) - self.store_type(line_no, "typedef", symbol, ref_name=name) - continue - - for re_enum in self.RE_ENUMS: - match = re_enum.match(line) - if match: - self.store_type(line_no, "enum", match.group(1)) - is_enum = True - break - - for re_struct in self.RE_STRUCTS: - match = re_struct.match(line) - if match: - self.store_type(line_no, "struct", match.group(1)) - break - - self.apply_exceptions() - - def debug_print(self): - """ - Print debug information containing the replacement rules per symbol. - To make easier to check, group them per type. - """ - if not self.debug: - return - - for c_type, refs in self.symbols.items(): - if not refs: # Skip empty dictionaries - continue - - print(f"{c_type}:") - - for symbol, (ref, ln) in sorted(refs.items()): - print(f" #{ln:<5d} {symbol} -> {ref}") - - print() - - def gen_output(self): - """Write the formatted output to a file.""" - - # Avoid extra blank lines - text = re.sub(r"\s+$", "", self.data) + "\n" - text = re.sub(r"\n\s+\n", "\n\n", text) - - # Escape Sphinx special characters - text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) - - # Source uAPI files may have special notes. Use bold font for them - text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) - - # Delimiters to catch the entire symbol after escaped - start_delim = r"([ \n\t\(=\*\@])" - end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" - - # Process all reference types - for ref_dict in self.symbols.values(): - for symbol, (replacement, _) in ref_dict.items(): - symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) - text = re.sub(fr'{start_delim}{symbol}{end_delim}', - fr'\1{replacement}\2', text) - - # Remove "\ " where not needed: before spaces and at the end of lines - text = re.sub(r"\\ ([\n ])", r"\1", text) - text = re.sub(r" \\ ", " ", text) - - return text - - def gen_toc(self): - """ - Create a list of symbols to be part of a TOC contents table - """ - text = [] - - # Sort symbol types per description - symbol_descriptions = [] - for k, v in self.DEF_SYMBOL_TYPES.items(): - symbol_descriptions.append((v['description'], k)) - - symbol_descriptions.sort() - - # Process each category - for description, c_type in symbol_descriptions: - - refs = self.symbols[c_type] - if not refs: # Skip empty categories - continue - - text.append(f"{description}") - text.append("-" * len(description)) - text.append("") - - # Sort symbols alphabetically - for symbol, (ref, ln) in sorted(refs.items()): - text.append(f"- LINENO_{ln}: {ref}") - - text.append("") # Add empty line between categories - - return "\n".join(text) - - def write_output(self, file_in: str, file_out: str, toc: bool): - title = os.path.basename(file_in) - - if toc: - text = self.gen_toc() - else: - text = self.gen_output() - - with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: - f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") - f.write(f"{title}\n") - f.write("=" * len(title) + "\n\n") - - if not toc: - f.write(".. parsed-literal::\n\n") - - f.write(text) diff --git a/tools/docs/lib/python_version.py b/tools/docs/lib/python_version.py deleted file mode 100644 index 4fde1b882164..000000000000 --- a/tools/docs/lib/python_version.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-or-later -# Copyright (c) 2017-2025 Mauro Carvalho Chehab - -""" -Handle Python version check logic. - -Not all Python versions are supported by scripts. Yet, on some cases, -like during documentation build, a newer version of python could be -available. - -This class allows checking if the minimal requirements are followed. - -Better than that, PythonVersion.check_python() not only checks the minimal -requirements, but it automatically switches to a the newest available -Python version if present. - -""" - -import os -import re -import subprocess -import shlex -import sys - -from glob import glob -from textwrap import indent - -class PythonVersion: - """ - Ancillary methods that checks for missing dependencies for different - types of types, like binaries, python modules, rpm deps, etc. - """ - - def __init__(self, version): - """Ïnitialize self.version tuple from a version string""" - self.version = self.parse_version(version) - - @staticmethod - def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - - @staticmethod - def ver_str(version): - """Returns a version tuple as major.minor.patch""" - return ".".join([str(x) for x in version]) - - @staticmethod - def cmd_print(cmd, max_len=80): - cmd_line = [] - - for w in cmd: - w = shlex.quote(w) - - if cmd_line: - if not max_len or len(cmd_line[-1]) + len(w) < max_len: - cmd_line[-1] += " " + w - continue - else: - cmd_line[-1] += " \\" - cmd_line.append(w) - else: - cmd_line.append(w) - - return "\n ".join(cmd_line) - - def __str__(self): - """Returns a version tuple as major.minor.patch from self.version""" - return self.ver_str(self.version) - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - kwargs = {} - if sys.version_info < (3, 7): - kwargs['universal_newlines'] = True - else: - kwargs['text'] = True - - result = subprocess.run([cmd, "--version"], - stdout = subprocess.PIPE, - stderr = subprocess.PIPE, - **kwargs, check=False) - - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return PythonVersion.parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(min_version): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9][0-9]", - "python3.[0-9]", - ] - - python_cmd = [] - - # Seek for a python binary newer than min_version - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = PythonVersion.get_python_version(cmd) - if version >= min_version: - python_cmd.append((version, cmd)) - - return sorted(python_cmd, reverse=True) - - @staticmethod - def check_python(min_version, show_alternatives=False, bail_out=False, - success_on_error=False): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= min_version: - ver = PythonVersion.ver_str(cur_ver) - return - - python_ver = PythonVersion.ver_str(cur_ver) - - available_versions = PythonVersion.find_python(min_version) - if not available_versions: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") - print(" Can't find a new version. This script may fail") - return - - script_path = os.path.abspath(sys.argv[0]) - - # Check possible alternatives - if available_versions: - new_python_cmd = available_versions[0][1] - else: - new_python_cmd = None - - if show_alternatives and available_versions: - print("You could run, instead:") - for _, cmd in available_versions: - args = [cmd, script_path] + sys.argv[1:] - - cmd_str = indent(PythonVersion.cmd_print(args), " ") - print(f"{cmd_str}\n") - - if bail_out: - msg = f"Python {python_ver} not supported. Bailing out" - if success_on_error: - print(msg, file=sys.stderr) - sys.exit(0) - else: - sys.exit(msg) - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - # Restart script using the newer version - args = [new_python_cmd, script_path] + sys.argv[1:] - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index 6716c7300258..ed9cf2bf22de 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -24,10 +24,13 @@ The optional ``FILE_RULES`` contains a set of rules like: replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` """ -import argparse +import argparse, sys +import os.path -from lib.parse_data_structs import ParseDataStructs -from lib.enrich_formatter import EnrichFormatter +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from parse_data_structs import ParseDataStructs +from enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 1efaca3d16aa..ce0b1b5292da 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -56,14 +56,15 @@ import sys from concurrent import futures from glob import glob -from lib.python_version import PythonVersion -from lib.latex_fonts import LatexFontChecker -LIB_DIR = "../../scripts/lib" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary +from python_version import PythonVersion +from latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index 647e1f60357f..d8c9fb76948d 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -32,8 +32,11 @@ import re import subprocess import sys from glob import glob +import os.path -from lib.python_version import PythonVersion +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py new file mode 100644 index 000000000000..66a738013ce1 --- /dev/null +++ b/tools/lib/python/abi/abi_parser.py @@ -0,0 +1,628 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +from argparse import Namespace +import logging +import os +import re + +from pprint import pformat +from random import randrange, seed + +# Import Python modules + +from helpers import AbiDebug, ABI_DIR + + +class AbiParser: + """Main class to parse ABI files""" + + TAGS = r"(what|where|date|kernelversion|contact|description|users)" + XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" + + def __init__(self, directory, logger=None, + enable_lineno=False, show_warnings=True, debug=0): + """Stores arguments for the class and initialize class vars""" + + self.directory = directory + self.enable_lineno = enable_lineno + self.show_warnings = show_warnings + self.debug = debug + + if not logger: + self.log = logging.getLogger("get_abi") + else: + self.log = logger + + self.data = {} + self.what_symbols = {} + self.file_refs = {} + self.what_refs = {} + + # Ignore files that contain such suffixes + self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") + + # Regular expressions used on parser + self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) + self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) + self.re_valid = re.compile(self.TAGS) + self.re_start_spc = re.compile(r"(\s*)(\S.*)") + self.re_whitespace = re.compile(r"^\s+") + + # Regular used on print + self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") + self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") + self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") + self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") + self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") + self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") + self.re_xref_node = re.compile(self.XREF) + + def warn(self, fdata, msg, extra=None): + """Displays a parse error if warning is enabled""" + + if not self.show_warnings: + return + + msg = f"{fdata.fname}:{fdata.ln}: {msg}" + if extra: + msg += "\n\t\t" + extra + + self.log.warning(msg) + + def add_symbol(self, what, fname, ln=None, xref=None): + """Create a reference table describing where each 'what' is located""" + + if what not in self.what_symbols: + self.what_symbols[what] = {"file": {}} + + if fname not in self.what_symbols[what]["file"]: + self.what_symbols[what]["file"][fname] = [] + + if ln and ln not in self.what_symbols[what]["file"][fname]: + self.what_symbols[what]["file"][fname].append(ln) + + if xref: + self.what_symbols[what]["xref"] = xref + + def _parse_line(self, fdata, line): + """Parse a single line of an ABI file""" + + new_what = False + new_tag = False + content = None + + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + sep = match.group(2) + content = match.group(3) + + match = self.re_valid.search(new) + if match: + new_tag = match.group(1) + else: + if fdata.tag == "description": + # New "tag" is actually part of description. + # Don't consider it a tag + new_tag = False + elif fdata.tag != "": + self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) + + if new_tag: + # "where" is Invalid, but was a common mistake. Warn if found + if new_tag == "where": + self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") + new_tag = "what" + + if new_tag == "what": + fdata.space = None + + if content not in self.what_symbols: + self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) + + if fdata.tag == "what": + fdata.what.append(content.strip("\n")) + else: + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fdata.fname, + ln=fdata.what_ln, xref=fdata.key) + + fdata.label = content + new_what = True + + key = "abi_" + content.lower() + fdata.key = self.re_unprintable.sub("_", key).strip("_") + + # Avoid duplicated keys but using a defined seed, to make + # the namespace identical if there aren't changes at the + # ABI symbols + seed(42) + + while fdata.key in self.data: + char = randrange(0, 51) + ord("A") + if char > ord("Z"): + char += ord("a") - ord("Z") - 1 + + fdata.key += chr(char) + + if fdata.key and fdata.key not in self.data: + self.data[fdata.key] = { + "what": [content], + "file": [fdata.file_ref], + "path": fdata.ftype, + "line_no": fdata.ln, + } + + fdata.what = self.data[fdata.key]["what"] + + self.what_refs[content] = fdata.key + fdata.tag = new_tag + fdata.what_ln = fdata.ln + + if fdata.nametag["what"]: + t = (content, fdata.key) + if t not in fdata.nametag["symbols"]: + fdata.nametag["symbols"].append(t) + + return + + if fdata.tag and new_tag: + fdata.tag = new_tag + + if new_what: + fdata.label = "" + + if "description" in self.data[fdata.key]: + self.data[fdata.key]["description"] += "\n\n" + + if fdata.file_ref not in self.data[fdata.key]["file"]: + self.data[fdata.key]["file"].append(fdata.file_ref) + + if self.debug == AbiDebug.WHAT_PARSING: + self.log.debug("what: %s", fdata.what) + + if not fdata.what: + self.warn(fdata, "'What:' should come first:", line) + return + + if new_tag == "description": + fdata.space = None + + if content: + sep = sep.replace(":", " ") + + c = " " * len(new_tag) + sep + content + c = c.expandtabs() + + match = self.re_start_spc.match(c) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + content = match.group(2) + "\n" + + self.data[fdata.key][fdata.tag] = content + + return + + # Store any contents before tags at the database + if not fdata.tag and "what" in fdata.nametag: + fdata.nametag["description"] += line + return + + if fdata.tag == "description": + content = line.expandtabs() + + if self.re_whitespace.sub("", content) == "": + self.data[fdata.key][fdata.tag] += "\n" + return + + if fdata.space is None: + match = self.re_start_spc.match(content) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + + content = match.group(2) + "\n" + else: + if content.startswith(fdata.space): + content = content[len(fdata.space):] + + else: + fdata.space = "" + + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += content + return + + content = line.strip() + if fdata.tag: + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") + return + + # Everything else is error + if content: + self.warn(fdata, "Unexpected content", line) + + def parse_readme(self, nametag, fname): + """Parse ABI README file""" + + nametag["what"] = ["Introduction"] + nametag["path"] = "README" + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + + match = self.re_valid.search(new) + if match: + nametag["description"] += "\n:" + line + continue + + nametag["description"] += line + + def parse_file(self, fname, path, basename): + """Parse a single file""" + + ref = f"abi_file_{path}_{basename}" + ref = self.re_unprintable.sub("_", ref).strip("_") + + # Store per-file state into a namespace variable. This will be used + # by the per-line parser state machine and by the warning function. + fdata = Namespace + + fdata.fname = fname + fdata.name = basename + + pos = fname.find(ABI_DIR) + if pos > 0: + f = fname[pos:] + else: + f = fname + + fdata.file_ref = (f, ref) + self.file_refs[f] = ref + + fdata.ln = 0 + fdata.what_ln = 0 + fdata.tag = "" + fdata.label = "" + fdata.what = [] + fdata.key = None + fdata.xrefs = None + fdata.space = None + fdata.ftype = path.split("/")[0] + + fdata.nametag = {} + fdata.nametag["what"] = [f"ABI file {path}/{basename}"] + fdata.nametag["type"] = "File" + fdata.nametag["path"] = fdata.ftype + fdata.nametag["file"] = [fdata.file_ref] + fdata.nametag["line_no"] = 1 + fdata.nametag["description"] = "" + fdata.nametag["symbols"] = [] + + self.data[ref] = fdata.nametag + + if self.debug & AbiDebug.WHAT_OPEN: + self.log.debug("Opening file %s", fname) + + if basename == "README": + self.parse_readme(fdata.nametag, fname) + return + + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + fdata.ln += 1 + + self._parse_line(fdata, line) + + if "description" in fdata.nametag: + fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") + + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fname, xref=fdata.key) + + def _parse_abi(self, root=None): + """Internal function to parse documentation ABI recursively""" + + if not root: + root = self.directory + + with os.scandir(root) as obj: + for entry in obj: + name = os.path.join(root, entry.name) + + if entry.is_dir(): + self._parse_abi(name) + continue + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if basename.startswith("."): + continue + + if basename.endswith(self.ignore_suffixes): + continue + + path = self.re_abi_dir.sub("", os.path.dirname(name)) + + self.parse_file(name, path, basename) + + def parse_abi(self, root=None): + """Parse documentation ABI""" + + self._parse_abi(root) + + if self.debug & AbiDebug.DUMP_ABI_STRUCTS: + self.log.debug(pformat(self.data)) + + def desc_txt(self, desc): + """Print description as found inside ABI files""" + + desc = desc.strip(" \t\n") + + return desc + "\n\n" + + def xref(self, fname): + """ + Converts a Documentation/ABI + basename into a ReST cross-reference + """ + + xref = self.file_refs.get(fname) + if not xref: + return None + else: + return xref + + def desc_rst(self, desc): + """Enrich ReST output by creating cross-references""" + + # Remove title markups from the description + # Having titles inside ABI files will only work if extra + # care would be taken in order to strictly follow the same + # level order for each markup. + desc = self.re_title_mark.sub("\n\n", "\n" + desc) + desc = desc.rstrip(" \t\n").lstrip("\n") + + # Python's regex performance for non-compiled expressions is a lot + # than Perl, as Perl automatically caches them at their + # first usage. Here, we'll need to do the same, as otherwise the + # performance penalty is be high + + new_desc = "" + for d in desc.split("\n"): + if d == "": + new_desc += "\n" + continue + + # Use cross-references for doc files where needed + d = self.re_doc.sub(r":doc:`/\1`", d) + + # Use cross-references for ABI generated docs where needed + matches = self.re_abi.findall(d) + for m in matches: + abi = m[0] + m[1] + + xref = self.file_refs.get(abi) + if not xref: + # This may happen if ABI is on a separate directory, + # like parsing ABI testing and symbol is at stable. + # The proper solution is to move this part of the code + # for it to be inside sphinx/kernel_abi.py + self.log.info("Didn't find ABI reference for '%s'", abi) + else: + new = self.re_escape.sub(r"\\\1", m[1]) + d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) + + # Seek for cross reference symbols like /sys/... + # Need to be careful to avoid doing it on a code block + if d[0] not in [" ", "\t"]: + matches = self.re_xref_node.findall(d) + for m in matches: + # Finding ABI here is more complex due to wildcards + xref = self.what_refs.get(m) + if xref: + new = self.re_escape.sub(r"\\\1", m) + d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) + + new_desc += d + "\n" + + return new_desc + "\n\n" + + def doc(self, output_in_txt=False, show_symbols=True, show_file=True, + filter_path=None): + """Print ABI at stdout""" + + part = None + for key, v in sorted(self.data.items(), + key=lambda x: (x[1].get("type", ""), + x[1].get("what"))): + + wtype = v.get("type", "Symbol") + file_ref = v.get("file") + names = v.get("what", [""]) + + if wtype == "File": + if not show_file: + continue + else: + if not show_symbols: + continue + + if filter_path: + if v.get("path") != filter_path: + continue + + msg = "" + + if wtype != "File": + cur_part = names[0] + if cur_part.find("/") >= 0: + match = self.re_what.match(cur_part) + if match: + symbol = match.group(1).rstrip("/") + cur_part = "Symbols under " + symbol + + if cur_part and cur_part != part: + part = cur_part + msg += part + "\n"+ "-" * len(part) +"\n\n" + + msg += f".. _{key}:\n\n" + + max_len = 0 + for i in range(0, len(names)): # pylint: disable=C0200 + names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" + + max_len = max(max_len, len(names[i])) + + msg += "+-" + "-" * max_len + "-+\n" + for name in names: + msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" + msg += "+-" + "-" * max_len + "-+\n" + msg += "\n" + + for ref in file_ref: + if wtype == "File": + msg += f".. _{ref[1]}:\n\n" + else: + base = os.path.basename(ref[0]) + msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" + + if wtype == "File": + msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" + + desc = v.get("description") + if not desc and wtype != "File": + msg += f"DESCRIPTION MISSING for {names[0]}\n\n" + + if desc: + if output_in_txt: + msg += self.desc_txt(desc) + else: + msg += self.desc_rst(desc) + + symbols = v.get("symbols") + if symbols: + msg += "Has the following ABI:\n\n" + + for w, label in symbols: + # Escape special chars from content + content = self.re_escape.sub(r"\\\1", w) + + msg += f"- :ref:`{content} <{label}>`\n\n" + + users = v.get("users") + if users and users.strip(" \t\n"): + users = users.strip("\n").replace('\n', '\n\t') + msg += f"Users:\n\t{users}\n\n" + + ln = v.get("line_no", 1) + + yield (msg, file_ref[0][0], ln) + + def check_issues(self): + """Warn about duplicated ABI entries""" + + for what, v in self.what_symbols.items(): + files = v.get("file") + if not files: + # Should never happen if the parser works properly + self.log.warning("%s doesn't have a file associated", what) + continue + + if len(files) == 1: + continue + + f = [] + for fname, lines in sorted(files.items()): + if not lines: + f.append(f"{fname}") + elif len(lines) == 1: + f.append(f"{fname}:{lines[0]}") + else: + m = fname + "lines " + m += ", ".join(str(x) for x in lines) + f.append(m) + + self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) + + def search_symbols(self, expr): + """ Searches for ABI symbols """ + + regex = re.compile(expr, re.I) + + found_keys = 0 + for t in sorted(self.data.items(), key=lambda x: [0]): + v = t[1] + + wtype = v.get("type", "") + if wtype == "File": + continue + + for what in v.get("what", [""]): + if regex.search(what): + found_keys += 1 + + kernelversion = v.get("kernelversion", "").strip(" \t\n") + date = v.get("date", "").strip(" \t\n") + contact = v.get("contact", "").strip(" \t\n") + users = v.get("users", "").strip(" \t\n") + desc = v.get("description", "").strip(" \t\n") + + files = [] + for f in v.get("file", ()): + files.append(f[0]) + + what = str(found_keys) + ". " + what + title_tag = "-" * len(what) + + print(f"\n{what}\n{title_tag}\n") + + if kernelversion: + print(f"Kernel version:\t\t{kernelversion}") + + if date: + print(f"Date:\t\t\t{date}") + + if contact: + print(f"Contact:\t\t{contact}") + + if users: + print(f"Users:\t\t\t{users}") + + print("Defined on file(s):\t" + ", ".join(files)) + + if desc: + desc = desc.strip("\n") + print(f"\n{desc}\n") + + if not found_keys: + print(f"Regular expression /{expr}/ not found.") diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py new file mode 100644 index 000000000000..8a57846cbc69 --- /dev/null +++ b/tools/lib/python/abi/abi_regex.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# xxpylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Convert ABI what into regular expressions +""" + +import re +import sys + +from pprint import pformat + +from abi_parser import AbiParser +from helpers import AbiDebug + +class AbiRegex(AbiParser): + """Extends AbiParser to search ABI nodes with regular expressions""" + + # Escape only ASCII visible characters + escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" + leave_others = "others" + + # Tuples with regular expressions to be compiled and replacement data + re_whats = [ + # Drop escape characters that might exist + (re.compile("\\\\"), ""), + + # Temporarily escape dot characters + (re.compile(r"\."), "\xf6"), + + # Temporarily change [0-9]+ type of patterns + (re.compile(r"\[0\-9\]\+"), "\xff"), + + # Temporarily change [\d+-\d+] type of patterns + (re.compile(r"\[0\-\d+\]"), "\xff"), + (re.compile(r"\[0:\d+\]"), "\xff"), + (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), + + # Temporarily change [0-9] type of patterns + (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), + + # Handle multiple option patterns + (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), + + # Handle wildcards + (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), + (re.compile(r"/\*/"), "/.*/"), + (re.compile(r"/\xf6\xf6\xf6"), "/.*"), + (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), + (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), + (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), + + (re.compile(r"XX+"), "\\\\w\xf7"), + (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), + (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), + (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), + + # Recover [0-9] type of patterns + (re.compile(r"\xf4"), "["), + (re.compile(r"\xf5"), "]"), + + # Remove duplicated spaces + (re.compile(r"\s+"), r" "), + + # Special case: drop comparison as in: + # What: foo = + # (this happens on a few IIO definitions) + (re.compile(r"\s*\=.*$"), ""), + + # Escape all other symbols + (re.compile(escape_symbols), r"\\\1"), + (re.compile(r"\\\\"), r"\\"), + (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), + (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), + + (re.compile(r"\xff"), r"\\d+"), + + # Special case: IIO ABI which a parenthesis. + (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), + + # Simplify regexes with multiple .* + (re.compile(r"(?:\.\*){2,}"), ""), + + # Recover dot characters + (re.compile(r"\xf6"), "\\."), + # Recover plus characters + (re.compile(r"\xf7"), "+"), + ] + re_has_num = re.compile(r"\\d") + + # Symbol name after escape_chars that are considered a devnode basename + re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") + + # List of popular group names to be skipped to minimize regex group size + # Use AbiDebug.SUBGROUP_SIZE to detect those + skip_names = set(["devices", "hwmon"]) + + def regex_append(self, what, new): + """ + Get a search group for a subset of regular expressions. + + As ABI may have thousands of symbols, using a for to search all + regular expressions is at least O(n^2). When there are wildcards, + the complexity increases substantially, eventually becoming exponential. + + To avoid spending too much time on them, use a logic to split + them into groups. The smaller the group, the better, as it would + mean that searches will be confined to a small number of regular + expressions. + + The conversion to a regex subset is tricky, as we need something + that can be easily obtained from the sysfs symbol and from the + regular expression. So, we need to discard nodes that have + wildcards. + + If it can't obtain a subgroup, place the regular expression inside + a special group (self.leave_others). + """ + + search_group = None + + for search_group in reversed(new.split("/")): + if not search_group or search_group in self.skip_names: + continue + if self.re_symbol_name.match(search_group): + break + + if not search_group: + search_group = self.leave_others + + if self.debug & AbiDebug.SUBGROUP_MAP: + self.log.debug("%s: mapped as %s", what, search_group) + + try: + if search_group not in self.regex_group: + self.regex_group[search_group] = [] + + self.regex_group[search_group].append(re.compile(new)) + if self.search_string: + if what.find(self.search_string) >= 0: + print(f"What: {what}") + except re.PatternError: + self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" + " '%s'", what, new) + + def get_regexes(self, what): + """ + Given an ABI devnode, return a list of all regular expressions that + may match it, based on the sub-groups created by regex_append() + """ + + re_list = [] + + patches = what.split("/") + patches.reverse() + patches.append(self.leave_others) + + for search_group in patches: + if search_group in self.regex_group: + re_list += self.regex_group[search_group] + + return re_list + + def __init__(self, *args, **kwargs): + """ + Override init method to get verbose argument + """ + + self.regex_group = None + self.search_string = None + self.re_string = None + + if "search_string" in kwargs: + self.search_string = kwargs.get("search_string") + del kwargs["search_string"] + + if self.search_string: + + try: + self.re_string = re.compile(self.search_string) + except re.PatternError as e: + msg = f"{self.search_string} is not a valid regular expression" + raise ValueError(msg) from e + + super().__init__(*args, **kwargs) + + def parse_abi(self, *args, **kwargs): + + super().parse_abi(*args, **kwargs) + + self.regex_group = {} + + print("Converting ABI What fields into regexes...", file=sys.stderr) + + for t in sorted(self.data.items(), key=lambda x: x[0]): + v = t[1] + if v.get("type") == "File": + continue + + v["regex"] = [] + + for what in v.get("what", []): + if not what.startswith("/sys"): + continue + + new = what + for r, s in self.re_whats: + try: + new = r.sub(s, new) + except re.PatternError as e: + # Help debugging troubles with new regexes + raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e + + v["regex"].append(new) + + if self.debug & AbiDebug.REGEX: + self.log.debug("%-90s <== %s", new, what) + + # Store regex into a subgroup to speedup searches + self.regex_append(what, new) + + if self.debug & AbiDebug.SUBGROUP_DICT: + self.log.debug("%s", pformat(self.regex_group)) + + if self.debug & AbiDebug.SUBGROUP_SIZE: + biggestd_keys = sorted(self.regex_group.keys(), + key= lambda k: len(self.regex_group[k]), + reverse=True) + + print("Top regex subgroups:", file=sys.stderr) + for k in biggestd_keys[:10]: + print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py new file mode 100644 index 000000000000..639b23e4ca33 --- /dev/null +++ b/tools/lib/python/abi/helpers.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# pylint: disable=R0903 +# SPDX-License-Identifier: GPL-2.0 + +""" +Helper classes for ABI parser +""" + +ABI_DIR = "Documentation/ABI/" + + +class AbiDebug: + """Debug levels""" + + WHAT_PARSING = 1 + WHAT_OPEN = 2 + DUMP_ABI_STRUCTS = 4 + UNDEFINED = 8 + REGEX = 16 + SUBGROUP_MAP = 32 + SUBGROUP_DICT = 64 + SUBGROUP_SIZE = 128 + GRAPH = 256 + + +DEBUG_HELP = """ +1 - enable debug parsing logic +2 - enable debug messages on file open +4 - enable debug for ABI parse data +8 - enable extra debug information to identify troubles + with ABI symbols found at the local machine that + weren't found on ABI documentation (used only for + undefined subcommand) +16 - enable debug for what to regex conversion +32 - enable debug for symbol regex subgroups +64 - enable debug for sysfs graph tree variable +""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py new file mode 100644 index 000000000000..f15c94a6e33c --- /dev/null +++ b/tools/lib/python/abi/system_symbols.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0912,R0914,R0915,R1702 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import os +import re +import sys + +from concurrent import futures +from datetime import datetime +from random import shuffle + +from helpers import AbiDebug + +class SystemSymbols: + """Stores arguments for the class and initialize class vars""" + + def graph_add_file(self, path, link=None): + """ + add a file path to the sysfs graph stored at self.root + """ + + if path in self.files: + return + + name = "" + ref = self.root + for edge in path.split("/"): + name += edge + "/" + if edge not in ref: + ref[edge] = {"__name": [name.rstrip("/")]} + + ref = ref[edge] + + if link and link not in ref["__name"]: + ref["__name"].append(link.rstrip("/")) + + self.files.add(path) + + def print_graph(self, root_prefix="", root=None, level=0): + """Prints a reference tree graph using UTF-8 characters""" + + if not root: + root = self.root + level = 0 + + # Prevent endless traverse + if level > 5: + return + + if level > 0: + prefix = "├──" + last_prefix = "└──" + else: + prefix = "" + last_prefix = "" + + items = list(root.items()) + + names = root.get("__name", []) + for k, edge in items: + if k == "__name": + continue + + if not k: + k = "/" + + if len(names) > 1: + k += " links: " + ",".join(names[1:]) + + if edge == items[-1][1]: + print(root_prefix + last_prefix + k) + p = root_prefix + if level > 0: + p += " " + self.print_graph(p, edge, level + 1) + else: + print(root_prefix + prefix + k) + p = root_prefix + "│ " + self.print_graph(p, edge, level + 1) + + def _walk(self, root): + """ + Walk through sysfs to get all devnodes that aren't ignored. + + By default, uses /sys as sysfs mounting point. If another + directory is used, it replaces them to /sys at the patches. + """ + + with os.scandir(root) as obj: + for entry in obj: + path = os.path.join(root, entry.name) + if self.sysfs: + p = path.replace(self.sysfs, "/sys", count=1) + else: + p = path + + if self.re_ignore.search(p): + return + + # Handle link first to avoid directory recursion + if entry.is_symlink(): + real = os.path.realpath(path) + if not self.sysfs: + self.aliases[path] = real + else: + real = real.replace(self.sysfs, "/sys", count=1) + + # Add absfile location to graph if it doesn't exist + if not self.re_ignore.search(real): + # Add link to the graph + self.graph_add_file(real, p) + + elif entry.is_file(): + self.graph_add_file(p) + + elif entry.is_dir(): + self._walk(path) + + def __init__(self, abi, sysfs="/sys", hints=False): + """ + Initialize internal variables and get a list of all files inside + sysfs that can currently be parsed. + + Please notice that there are several entries on sysfs that aren't + documented as ABI. Ignore those. + + The real paths will be stored under self.files. Aliases will be + stored in separate, as self.aliases. + """ + + self.abi = abi + self.log = abi.log + + if sysfs != "/sys": + self.sysfs = sysfs.rstrip("/") + else: + self.sysfs = None + + self.hints = hints + + self.root = {} + self.aliases = {} + self.files = set() + + dont_walk = [ + # Those require root access and aren't documented at ABI + f"^{sysfs}/kernel/debug", + f"^{sysfs}/kernel/tracing", + f"^{sysfs}/fs/pstore", + f"^{sysfs}/fs/bpf", + f"^{sysfs}/fs/fuse", + + # This is not documented at ABI + f"^{sysfs}/module", + + f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI + f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings + "sections|notes", # aren't actually part of ABI + + # kernel-parameters.txt - not easy to parse + "parameters", + ] + + self.re_ignore = re.compile("|".join(dont_walk)) + + print(f"Reading {sysfs} directory contents...", file=sys.stderr) + self._walk(sysfs) + + def check_file(self, refs, found): + """Check missing ABI symbols for a given sysfs file""" + + res_list = [] + + try: + for names in refs: + fname = names[0] + + res = { + "found": False, + "fname": fname, + "msg": "", + } + res_list.append(res) + + re_what = self.abi.get_regexes(fname) + if not re_what: + self.abi.log.warning(f"missing rules for {fname}") + continue + + for name in names: + for r in re_what: + if self.abi.debug & AbiDebug.UNDEFINED: + self.log.debug("check if %s matches '%s'", name, r.pattern) + if r.match(name): + res["found"] = True + if found: + res["msg"] += f" {fname}: regex:\n\t" + continue + + if self.hints and not res["found"]: + res["msg"] += f" {fname} not found. Tested regexes:\n" + for r in re_what: + res["msg"] += " " + r.pattern + "\n" + + except KeyboardInterrupt: + pass + + return res_list + + def _ref_interactor(self, root): + """Recursive function to interact over the sysfs tree""" + + for k, v in root.items(): + if isinstance(v, dict): + yield from self._ref_interactor(v) + + if root == self.root or k == "__name": + continue + + if self.abi.re_string: + fname = v["__name"][0] + if self.abi.re_string.search(fname): + yield v + else: + yield v + + + def get_fileref(self, all_refs, chunk_size): + """Interactor to group refs into chunks""" + + n = 0 + refs = [] + + for ref in all_refs: + refs.append(ref) + + n += 1 + if n >= chunk_size: + yield refs + n = 0 + refs = [] + + yield refs + + def check_undefined_symbols(self, max_workers=None, chunk_size=50, + found=None, dry_run=None): + """Seach ABI for sysfs symbols missing documentation""" + + self.abi.parse_abi() + + if self.abi.debug & AbiDebug.GRAPH: + self.print_graph() + + all_refs = [] + for ref in self._ref_interactor(self.root): + all_refs.append(ref["__name"]) + + if dry_run: + print("Would check", file=sys.stderr) + for ref in all_refs: + print(", ".join(ref)) + + return + + print("Starting to search symbols (it may take several minutes):", + file=sys.stderr) + start = datetime.now() + old_elapsed = None + + # Python doesn't support multithreading due to limitations on its + # global lock (GIL). While Python 3.13 finally made GIL optional, + # there are still issues related to it. Also, we want to have + # backward compatibility with older versions of Python. + # + # So, use instead multiprocess. However, Python is very slow passing + # data from/to multiple processes. Also, it may consume lots of memory + # if the data to be shared is not small. So, we need to group workload + # in chunks that are big enough to generate performance gains while + # not being so big that would cause out-of-memory. + + num_refs = len(all_refs) + print(f"Number of references to parse: {num_refs}", file=sys.stderr) + + if not max_workers: + max_workers = os.cpu_count() + elif max_workers > os.cpu_count(): + max_workers = os.cpu_count() + + max_workers = max(max_workers, 1) + + max_chunk_size = int((num_refs + max_workers - 1) / max_workers) + chunk_size = min(chunk_size, max_chunk_size) + chunk_size = max(1, chunk_size) + + if max_workers > 1: + executor = futures.ProcessPoolExecutor + + # Place references in a random order. This may help improving + # performance, by mixing complex/simple expressions when creating + # chunks + shuffle(all_refs) + else: + # Python has a high overhead with processes. When there's just + # one worker, it is faster to not create a new process. + # Yet, User still deserves to have a progress print. So, use + # python's "thread", which is actually a single process, using + # an internal schedule to switch between tasks. No performance + # gains for non-IO tasks, but still it can be quickly interrupted + # from time to time to display progress. + executor = futures.ThreadPoolExecutor + + not_found = [] + f_list = [] + with executor(max_workers=max_workers) as exe: + for refs in self.get_fileref(all_refs, chunk_size): + if refs: + try: + f_list.append(exe.submit(self.check_file, refs, found)) + + except KeyboardInterrupt: + return + + total = len(f_list) + + if not total: + if self.abi.re_string: + print(f"No ABI symbol matches {self.abi.search_string}") + else: + self.abi.log.warning("No ABI symbols found") + return + + print(f"{len(f_list):6d} jobs queued on {max_workers} workers", + file=sys.stderr) + + while f_list: + try: + t = futures.wait(f_list, timeout=1, + return_when=futures.FIRST_COMPLETED) + + done = t[0] + + for fut in done: + res_list = fut.result() + + for res in res_list: + if not res["found"]: + not_found.append(res["fname"]) + if res["msg"]: + print(res["msg"]) + + f_list.remove(fut) + except KeyboardInterrupt: + return + + except RuntimeError as e: + self.abi.log.warning(f"Future: {e}") + break + + if sys.stderr.isatty(): + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + if len(f_list) < total: + elapsed += f" ({total - len(f_list)}/{total} jobs completed). " + if elapsed != old_elapsed: + print(elapsed + "\r", end="", flush=True, + file=sys.stderr) + old_elapsed = elapsed + + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + print(elapsed, file=sys.stderr) + + for f in sorted(not_found): + print(f"{f} not found.") diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/tools/lib/python/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/lib/python/kdoc/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab . + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py new file mode 100644 index 000000000000..1fd8d17edb32 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Parse lernel-doc tags on multiple kernel source files. +""" + +import argparse +import logging +import os +import re + +from kdoc_parser import KernelDoc +from kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(follow_symlinks=False): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an interator to parse all source files from file_list, + handling directories if any + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the syler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in glob.parse_files(export_file, self.file_not_found_cb): + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + if fname not in self.results: + self.config.log.warning("No kernel-doc for file %s", fname) + continue + + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: + m = self.out_msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + if msg: + yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py new file mode 100644 index 000000000000..19805301cb2c --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# A class that will, eventually, encapsulate all of the parsed data that we +# then pass into the output modules. +# + +class KdocItem: + def __init__(self, name, fname, type, start_line, **other_stuff): + self.name = name + self.fname = fname + self.type = type + self.declaration_start_line = start_line + self.sections = {} + self.sections_start_lines = {} + self.parameterlist = [] + self.parameterdesc_start_lines = [] + self.parameterdescs = {} + self.parametertypes = {} + # + # Just save everything else into our own dict so that the output + # side can grab it directly as before. As we move things into more + # structured data, this will, hopefully, fade away. + # + self.other_stuff = other_stuff + + def get(self, key, default = None): + return self.other_stuff.get(key, default) + + def __getitem__(self, key): + return self.get(key) + + # + # Tracking of section and parameter information. + # + def set_sections(self, sections, start_lines): + self.sections = sections + self.section_start_lines = start_lines + + def set_params(self, names, descs, types, starts): + self.parameterlist = names + self.parameterdescs = descs + self.parametertypes = types + self.parameterdesc_start_lines = starts diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py new file mode 100644 index 000000000000..58f115059e93 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -0,0 +1,824 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class (OutputFormat) which +contains a dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output. Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc_parser import KernelDoc, type_param +from kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 # output all symbols and doc sections + OUTPUT_INCLUDE = 1 # output only specified symbols + OUTPUT_EXPORTED = 2 # output exported symbols + OUTPUT_INTERNAL = 3 # output non-exported symbols + + # Virtual member to be overriden at the inherited classes + highlights = [] + + def __init__(self): + """Declare internal vars and set mode to OUTPUT_ALL""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according with the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + for log_msg in args.warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + + self.data = "" + + dtype = args.type + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't now how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + + def out_doc(self, fname, name, args): + """Outputs a DOC block""" + + def out_function(self, fname, name, args): + """Outputs a function""" + + def out_enum(self, fname, name, args): + """Outputs an enum""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef""" + + def out_struct(self, fname, name, args): + """Outputs a struct""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output""" + + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + blankline = "\n" + + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + for section, text in args.sections.items(): + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(args.section_start_lines.get(section, 0)) + self.output_highlight(text) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = name + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += name + " (" + + ln = args.declaration_start_line + count = 0 + for parameter in args.parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args.parametertypes.get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {name}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML
) to help set + # function prototypes apart + self.lineprefix = " " + + if args.parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args.parametertypes.get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in args.parameterdescs and \ + args.parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(args.parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in args.parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(args.parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.type + ln = args.declaration_start_line + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in args.parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(args.parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + self.symbols = [] + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_function(self, fname, name, args): + """output function in man""" + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {name}' + "\n" + else: + self.data += f'.B "{name}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in args.parameterlist: + if count == len(args.parameterlist) - 1: + post = ");" + + dtype = args.parametertypes.get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if args.parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in args.parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(text) + + def out_enum(self, fname, name, args): + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {name}" + " {\n" + + count = 0 + for parameter in args.parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(args.parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_typedef(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {name} \\- {purpose}\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_struct(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + definition = args.get('definition') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args.type} {name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{args.type} {name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in args.parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name)) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py new file mode 100644 index 000000000000..f7dbb0868367 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -0,0 +1,1667 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import sys +import re +from pprint import pformat + +from kdoc_re import NestedMatch, KernRe +from kdoc_item import KdocItem + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) +doc_sect = doc_com + \ + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text + +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + return multi_space.sub(' ', s.strip()) + +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + +class state: + """ + State machine enums + """ + + # Parser states + NORMAL = 0 # normal code + NAME = 1 # looking for function name + DECLARATION = 2 # We have seen a declaration which might not be done + BODY = 3 # the body of the comment + SPECIAL_SECTION = 4 # doc section ending with a blank line + PROTO = 5 # scanning prototype + DOCBLOCK = 6 # documentation block + INLINE_NAME = 7 # gathering doc outside main block + INLINE_TEXT = 8 # reading the body of inline docs + + name = [ + "NORMAL", + "NAME", + "DECLARATION", + "BODY", + "SPECIAL_SECTION", + "PROTO", + "DOCBLOCK", + "INLINE_NAME", + "INLINE_TEXT", + ] + + +SECTION_DEFAULT = "Description" # default section + +class KernelEntry: + + def __init__(self, config, fname, ln): + self.config = config + self.fname = fname + + self._contents = [] + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + self.fname = fname + + # State flags + self.brcount = 0 + self.declaration_start_line = ln + 1 + + # + # Management of section contents + # + def add_text(self, text): + self._contents.append(text) + + def contents(self): + return '\n'.join(self._contents) + '\n' + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + # + # Begin a new section. + # + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return + name = self.section + contents = self.contents() + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'") + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents + else: + self.sections[name] = contents + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self._contents = [] + +python_warning = False + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Section names + + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = state.NORMAL + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + python_warning = True + + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + if self.entry: + self.entry.emit_msg(ln, msg, warning=warning) + return + + log_msg = f"{self.fname}:{ln} {msg}" + + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + if self.entry: + self.entry.dump_section(start_new) + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections + for section in ["Description", "Return"]: + if section in sections and not sections[section].rstrip(): + del sections[section] + item.set_sections(sections, self.entry.section_start_lines) + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) + self.entries.append(item) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + + self.entry = KernelEntry(self.config, self.fname, ln) + + # State flags + self.state = state.NORMAL + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + # + # The pointer-to-function case. + # + elif KernRe(r'\(.+\)\s*\(').search(arg): + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # + elif KernRe(r'\(.+\)\s*\[').search(arg): + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + args = KernRe(r'\s*,\s*').split(arg) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) + else: + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + + bitfield_re = KernRe(r'(.*?):(\w+)') + for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): + if dtype != "": # Skip unnamed bit-fields + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", + arg, declaration_name) + else: + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + self.emit_msg(ln, + f"Excess {dname} '{section}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") + + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): + type_pattern = r'(struct|union)' + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + return (r.group(1), r.group(2), r.group(3)) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + if r.search(proto): + return (r.group(1), r.group(3), r.group(2)) + return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # + def rewrite_struct_members(self, members): + # + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). + # + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: + for t in tuples: + newmember = "" + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): + s_id = s_id.strip() + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + # + # Pass through the members of this inner structure/union. + # + for arg in content.split(';'): + arg = arg.strip() + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') + if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) + # Pointer-to-function + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + # + # Otherwise a non-function member. + # + else: + # + # Remove bitmap and array portions and spaces around commas + # + arg = KernRe(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) + # + # Look for a normal decl - "type name[,name...]" + # + r = KernRe(r'(.*)\s+([\S+,]+)') + if r.search(arg): + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " + else: + newmember += f"{arg}; " + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # + members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members + + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) + self.output_declaration(decl_type, declaration_name, + definition=self.format_struct_decl(declaration), + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + # + # Parse out the name and members of the enum. Typedef form first. + # + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = trim_private_members(r.group(1)) + # + # Failing that, look for a straight enum + # + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = trim_private_members(r.group(2)) + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # + member_set = set() + members = KernRe(r'\([^;)]*\)').sub('', members) + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + # + # Ensure that every described member actually exists in the enum. + # + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_msg(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + elif self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + elif self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + found = func_macro = False + return_type = '' + decl_type = 'function' + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + prototype = new_proto + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'\w+' + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # + if not found: + patterns = [ + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', + ] + + for p in patterns: + r = KernRe(p) + if r.match(prototype): + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + found = True + break + # + # Parsing done; make sure that things are as we expect. + # + if not found: + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") + return + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") + return + self.check_sections(ln, declaration_name, "function") + self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.create_parameter_list(ln, 'function', args, ',', declaration_name) + + self.output_declaration('function', declaration_name, + typedef=True, + functiontype=return_type, + purpose=self.entry.declaration_purpose) + return + # + # Not a function, try to parse a simple typedef. + # + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + purpose=self.entry.declaration_purpose) + return + + self.emit_msg(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process EXPORT_SYMBOL* tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + elif export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] + function_set.add(symbol) + return True + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + + # next line is always the function name + self.state = state.NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + # + # Check for a DOC: block and handle them specially. + # + if doc_block.search(line): + + if not doc_block.group(1): + self.entry.begin_section(ln, "Introduction") + else: + self.entry.begin_section(ln, doc_block.group(1)) + + self.entry.identifier = self.entry.section + self.state = state.DOCBLOCK + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + + # Test for data declaration + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) + # + # Look for a function description + # + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) + self.entry.decl_type = "function" + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return + # + # OK, set up for a new kerneldoc entry. + # + self.state = state.BODY + self.entry.identifier = self.entry.identifier.strip(" ") + # if there's no @param blocks need to set up default section here + self.entry.begin_section(ln + 1) + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # + r = KernRe("[-:](.*)") + if r.search(line): + self.entry.declaration_purpose = trim_whitespace(r.group(1)) + self.state = state.DECLARATION + else: + self.entry.declaration_purpose = "" + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = state.NORMAL + + if self.config.verbose: + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + # + # Failed to find an identifier. Emit a warning + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + + # + # Helper function to determine if a new section is being started. + # + def is_new_section(self, ln, line): + if doc_sect.search(line): + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # + newsection = doc_sect.group(1) + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + self.dump_section() + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.add_text(newcontents.lstrip()) + return True + return False + + # + # Helper function to detect (and effect) the end of a kerneldoc comment. + # + def is_comment_end(self, ln, line): + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # Look for anything with the " * " line beginning. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # + if cont == "": + self.state = state.BODY + # + # Otherwise we have more of the declaration section to soak up. + # + else: + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line + """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # + if KernRe(r"\s*\*\s*$").match(line): + self.entry.begin_section(ln, dump = True) + self.state = state.BODY + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.add_text(cont[self.entry.leading_space:]) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): + """ + STATE_BODY: the bulk of a kerneldoc comment. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + + if doc_content.search(line): + cont = doc_content.group(1) + self.entry.add_text(cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" + + if doc_inline_sect.search(line): + self.entry.begin_section(ln, doc_inline_sect.group(1)) + self.entry.add_text(doc_inline_sect.group(2).lstrip()) + self.state = state.INLINE_TEXT + elif doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? + + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" + + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + line = KernRe(r"//.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif not line.startswith('#'): # skip other preprocessor stuff + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + # + # If we now have the whole prototype, clean it up and declare victory. + # + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments and surrounding spaces + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + # by turning it into + # int pcs_config(struct foo) + # + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + # + # Handle special declaration syntaxes + # + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.begin_section(ln, doc_inline_oneline.group(1)) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() + + elif doc_inline_start.search(line): + self.state = state.INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + # + # The state/action table telling us which function to invoke in + # each state. + # + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.DECLARATION: process_decl, + state.SPECIAL_SECTION: process_special, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == state.PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + if not prev_ln: + prev_ln = ln + continue + + if prev: + ln = prev_ln + line = prev + line + prev = "" + prev_ln = None + + self.config.log.debug("%d %s: %s", + ln, state.name[self.state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) + + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..612223e1e723 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py new file mode 100755 index 000000000000..29317f8006ea --- /dev/null +++ b/tools/lib/python/kdoc/latex_fonts.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts. + +For "make pdfdocs", reports of build errors of translations.pdf started +arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3] format of "Noto CJK" +fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +References: +[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +[3]: https://en.wikipedia.org/wiki/Variable_font +[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +[5]: https://build.opensuse.org/request/show/1157217 + +#=========================================================================== +Workarounds for building translations.pdf +#=========================================================================== + +* Denylist "variable font" Noto CJK fonts. + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template: +----------------------------------------------------------------- + + + + + + + + /usr/share/fonts/google-noto-*-cjk-vf-fonts + + /usr/share/fonts/truetype/Noto*CJK*-VF.otf + + + +----------------------------------------------------------------- + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + - Need non-variable "Noto CJK" fonts. + * Fedora + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + * openSUSE tumbleweed + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + and at: + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + , then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +!!! Caution !!! + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self, deny_vf=None): + if not deny_vf: + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def description(self): + return __doc__ + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, run: + + tools/docs/check-variable-fonts.py -h + """) + msg += "=" * 77 + + return msg diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py new file mode 100755 index 000000000000..25361996cd20 --- /dev/null +++ b/tools/lib/python/kdoc/parse_data_structs.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore ` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace + + Replaces how old_symbol with a new reference. The new_reference can be: + + - A simple symbol name; + - A full Sphinx reference. + + 3. Namespace rules + + namespace + + Sets C namespace to be used during cross-reference generation. Can + be overridden by replace rules. + + On ignore and replace rules, can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + + namespace MC + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + self.namespace = None + self.ignore = [] + self.replace = [] + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def read_exceptions(self, fname: str): + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + + if match: + self.ignore.append((ln, match.group(1), match.group(2))) + continue + + # replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if match: + self.replace.append((ln, match.group(1), match.group(2), + match.group(3))) + continue + + match = re.match(r"^namespace\s+(\S+)", line) + if match: + self.namespace = match.group(1) + continue + + sys.exit(f"{name}:{ln}: invalid line: {line}") + + def apply_exceptions(self): + """ + Process exceptions file with rules to ignore or replace references. + """ + + # Handle ignore rules + for ln, c_type, symbol in self.ignore: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + # Handle replace rules + for ln, c_type, old, new in self.replace: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + (_, ln) = self.symbols[c_type][old] + self.symbols[c_type][old] = (new_ref, ln) + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def store_type(self, ln, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type or self.namespace: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + # C domain references may have namespaces + if ref_type.startswith(":c:"): + if self.namespace: + ref_name = f"{self.namespace}.{ref_name}" + + if ref_type: + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = f"`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str, exceptions: str = None): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + self.read_exceptions(exceptions) + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type(line_no, "symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type(line_no, "ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type(line_no, "define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type(line_no, "typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type(line_no, "enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type(line_no, "struct", match.group(1)) + break + + self.apply_exceptions() + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, (ref, ln) in sorted(refs.items()): + print(f" #{ln:<5d} {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, (replacement, _) in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a list of symbols to be part of a TOC contents table + """ + text = [] + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, (ref, ln) in sorted(refs.items()): + text.append(f"- LINENO_{ln}: {ref}") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py new file mode 100644 index 000000000000..4fde1b882164 --- /dev/null +++ b/tools/lib/python/kdoc/python_version.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab + +""" +Handle Python version check logic. + +Not all Python versions are supported by scripts. Yet, on some cases, +like during documentation build, a newer version of python could be +available. + +This class allows checking if the minimal requirements are followed. + +Better than that, PythonVersion.check_python() not only checks the minimal +requirements, but it automatically switches to a the newest available +Python version if present. + +""" + +import os +import re +import subprocess +import shlex +import sys + +from glob import glob +from textwrap import indent + +class PythonVersion: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + def __init__(self, version): + """Ïnitialize self.version tuple from a version string""" + self.version = self.parse_version(version) + + @staticmethod + def parse_version(version): + """Convert a major.minor.patch version into a tuple""" + return tuple(int(x) for x in version.split(".")) + + @staticmethod + def ver_str(version): + """Returns a version tuple as major.minor.patch""" + return ".".join([str(x) for x in version]) + + @staticmethod + def cmd_print(cmd, max_len=80): + cmd_line = [] + + for w in cmd: + w = shlex.quote(w) + + if cmd_line: + if not max_len or len(cmd_line[-1]) + len(w) < max_len: + cmd_line[-1] += " " + w + continue + else: + cmd_line[-1] += " \\" + cmd_line.append(w) + else: + cmd_line.append(w) + + return "\n ".join(cmd_line) + + def __str__(self): + """Returns a version tuple as major.minor.patch from self.version""" + return self.ver_str(self.version) + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + kwargs = {} + if sys.version_info < (3, 7): + kwargs['universal_newlines'] = True + else: + kwargs['text'] = True + + result = subprocess.run([cmd, "--version"], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + **kwargs, check=False) + + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return PythonVersion.parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(min_version): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9][0-9]", + "python3.[0-9]", + ] + + python_cmd = [] + + # Seek for a python binary newer than min_version + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = PythonVersion.get_python_version(cmd) + if version >= min_version: + python_cmd.append((version, cmd)) + + return sorted(python_cmd, reverse=True) + + @staticmethod + def check_python(min_version, show_alternatives=False, bail_out=False, + success_on_error=False): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= min_version: + ver = PythonVersion.ver_str(cur_ver) + return + + python_ver = PythonVersion.ver_str(cur_ver) + + available_versions = PythonVersion.find_python(min_version) + if not available_versions: + print(f"ERROR: Python version {python_ver} is not spported anymore\n") + print(" Can't find a new version. This script may fail") + return + + script_path = os.path.abspath(sys.argv[0]) + + # Check possible alternatives + if available_versions: + new_python_cmd = available_versions[0][1] + else: + new_python_cmd = None + + if show_alternatives and available_versions: + print("You could run, instead:") + for _, cmd in available_versions: + args = [cmd, script_path] + sys.argv[1:] + + cmd_str = indent(PythonVersion.cmd_print(args), " ") + print(f"{cmd_str}\n") + + if bail_out: + msg = f"Python {python_ver} not supported. Bailing out" + if success_on_error: + print(msg, file=sys.stderr) + sys.exit(0) + else: + sys.exit(msg) + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + # Restart script using the newer version + args = [new_python_cmd, script_path] + sys.argv[1:] + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") -- cgit v1.2.3 From 992a9df41ad7173588bf90e15b33d45db2811aea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:30 -0700 Subject: docs: bring some order to our Python module hierarchy Now that we have tools/lib/python for our Python modules, turn them into proper packages with a single namespace so that everything can just use tools/lib/python in sys.path. No functional change. Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-3-corbet@lwn.net> --- Documentation/sphinx/kernel_abi.py | 4 ++-- Documentation/sphinx/kernel_include.py | 4 ++-- Documentation/sphinx/kerneldoc.py | 6 +++--- scripts/kernel-doc.py | 6 +++--- tools/docs/check-variable-fonts.py | 4 ++-- tools/docs/get_abi.py | 10 +++++----- tools/docs/parse-headers.py | 6 +++--- tools/docs/sphinx-build-wrapper | 5 ++--- tools/docs/sphinx-pre-install | 4 ++-- tools/lib/python/__init__.py | 0 tools/lib/python/abi/__init__.py | 0 tools/lib/python/abi/abi_parser.py | 2 +- tools/lib/python/abi/abi_regex.py | 4 ++-- tools/lib/python/abi/system_symbols.py | 2 +- tools/lib/python/kdoc/__init__.py | 0 tools/lib/python/kdoc/kdoc_files.py | 4 ++-- tools/lib/python/kdoc/kdoc_output.py | 4 ++-- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 18 files changed, 34 insertions(+), 35 deletions(-) create mode 100644 tools/lib/python/__init__.py create mode 100644 tools/lib/python/abi/__init__.py create mode 100644 tools/lib/python/kdoc/__init__.py (limited to 'scripts') diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 7ec832da8444..5667f207d175 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,9 +43,9 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from abi_parser import AbiParser +from abi.abi_parser import AbiParser __version__ = "1.0" diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index a12455daa6d7..626762ff6af3 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,9 +97,9 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from parse_data_structs import ParseDataStructs +from kdoc.parse_data_structs import ParseDataStructs __version__ = "1.0" logger = logging.getLogger(__name__) diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 56f382a6bdf1..d8cdf068ef35 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,10 +42,10 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from kdoc_files import KernelFiles -from kdoc_output import RestFormat +from kdoc.kdoc_files import KernelFiles +from kdoc.kdoc_output import RestFormat __version__ = '1.0' kfiles = None diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index bb24bbf73167..7a1eaf986bcd 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "../tools/lib/python/kdoc" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) @@ -292,8 +292,8 @@ def main(): logger.warning("Python 3.7 or later is required for correct results") # Import kernel-doc libraries only after checking Python version - from kdoc_files import KernelFiles # pylint: disable=C0415 - from kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 if args.man: out_style = ManFormat(modulename=args.modulename) diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c48bb05dad82..958d5a745724 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -17,9 +17,9 @@ import sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) -from latex_fonts import LatexFontChecker +from kdoc.latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index e0abfe12fac7..2f0b99401f26 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,15 +14,15 @@ import sys # Import Python modules -LIB_DIR = "../lib/python/abi" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -from abi_parser import AbiParser # pylint: disable=C0413 -from abi_regex import AbiRegex # pylint: disable=C0413 -from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 -from system_symbols import SystemSymbols # pylint: disable=C0413 +from abi.abi_parser import AbiParser # pylint: disable=C0413 +from abi.abi_regex import AbiRegex # pylint: disable=C0413 +from abi.helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 +from abi.system_symbols import SystemSymbols # pylint: disable=C0413 # Command line classes diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index ed9cf2bf22de..436acea4c6ca 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -28,9 +28,9 @@ import argparse, sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from parse_data_structs import ParseDataStructs -from enrich_formatter import EnrichFormatter +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.parse_data_structs import ParseDataStructs +from kdoc.enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index ce0b1b5292da..d4943d952e2a 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -61,10 +61,9 @@ LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary -from python_version import PythonVersion -from latex_fonts import LatexFontChecker +from kdoc.python_version import PythonVersion +from kdoc.latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index d8c9fb76948d..965c9b093a41 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -35,8 +35,8 @@ from glob import glob import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from python_version import PythonVersion +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py index 66a738013ce1..9b8db70067ef 100644 --- a/tools/lib/python/abi/abi_parser.py +++ b/tools/lib/python/abi/abi_parser.py @@ -17,7 +17,7 @@ from random import randrange, seed # Import Python modules -from helpers import AbiDebug, ABI_DIR +from abi.helpers import AbiDebug, ABI_DIR class AbiParser: diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py index 8a57846cbc69..d5553206de3c 100644 --- a/tools/lib/python/abi/abi_regex.py +++ b/tools/lib/python/abi/abi_regex.py @@ -12,8 +12,8 @@ import sys from pprint import pformat -from abi_parser import AbiParser -from helpers import AbiDebug +from abi.abi_parser import AbiParser +from abi.helpers import AbiDebug class AbiRegex(AbiParser): """Extends AbiParser to search ABI nodes with regular expressions""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py index f15c94a6e33c..4a2554da217b 100644 --- a/tools/lib/python/abi/system_symbols.py +++ b/tools/lib/python/abi/system_symbols.py @@ -15,7 +15,7 @@ from concurrent import futures from datetime import datetime from random import shuffle -from helpers import AbiDebug +from abi.helpers import AbiDebug class SystemSymbols: """Stores arguments for the class and initialize class vars""" diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 1fd8d17edb32..562cdf5261c3 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -13,8 +13,8 @@ import logging import os import re -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat +from kdoc.kdoc_parser import KernelDoc +from kdoc.kdoc_output import OutputFormat class GlobSourceFiles: diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 58f115059e93..14378953301b 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -19,8 +19,8 @@ import os import re from datetime import datetime -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe +from kdoc.kdoc_parser import KernelDoc, type_param +from kdoc.kdoc_re import KernRe function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f7dbb0868367..c0cc714d4d6f 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -16,8 +16,8 @@ import sys import re from pprint import pformat -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem +from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_item import KdocItem # # Regular expressions used to parse kernel-doc markups at KernelDoc class. -- cgit v1.2.3 From 464c7ea5c3ffa333a2c1a8dfd68b157ced1edc5a Mon Sep 17 00:00:00 2001 From: Carlos López Date: Fri, 31 Oct 2025 12:19:09 +0100 Subject: checkpatch: add IDR to the deprecated list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of commit 85656ec193e9, the IDR interface is marked as deprecated in the documentation, but no checks are made in that regard for new code. Add the existing IDR initialization APIs to the deprecated list in checkpatch, so that if new code is introduced using these APIs, a warning is emitted. Link: https://lkml.kernel.org/r/20251031111908.2266077-2-clopez@suse.de Signed-off-by: Carlos López Suggested-by: Dan Williams Acked-by: Dan Williams Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Lukas Bulwahn Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6729f18e5654..d58ca9655ab7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -860,6 +860,10 @@ our %deprecated_apis = ( "kunmap" => "kunmap_local", "kmap_atomic" => "kmap_local_page", "kunmap_atomic" => "kunmap_local", + #These should be enough to drive away new IDR users + "DEFINE_IDR" => "DEFINE_XARRAY", + "idr_init" => "xa_init", + "idr_init_base" => "xa_init_flags" ); #Create a search pattern for all these strings to speed up a loop below -- cgit v1.2.3 From caa71919a622e3f7d290d4f17ae538b15f5cb6d3 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 6 Nov 2025 13:43:41 +0100 Subject: scripts/gdb/radix-tree: add lx-radix-tree-command Patch series "scripts/gdb/symbols: make BPF debug info available to GDB", v2. This series greatly simplifies debugging BPF progs when using QEMU gdbstub by providing symbol names, sizes, and line numbers to GDB. Patch 1 adds radix tree iteration, which is necessary for parsing prog_idr. Patch 2 is the actual implementation; its description contains some details on how to use this. This patch (of 2): Add a function and a command to iterate over radix tree contents. Duplicate the C implementation in Python, but drop support for tagging. Link: https://lkml.kernel.org/r/20251106124600.86736-1-iii@linux.ibm.com Link: https://lkml.kernel.org/r/20251106124600.86736-2-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Cc: Alexander Gordeev Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkman Cc: Heiko Carstens Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- scripts/gdb/linux/radixtree.py | 139 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/gdb/linux/radixtree.py b/scripts/gdb/linux/radixtree.py index 074543ac763d..bc2954e45c32 100644 --- a/scripts/gdb/linux/radixtree.py +++ b/scripts/gdb/linux/radixtree.py @@ -30,13 +30,16 @@ def entry_to_node(node): def node_maxindex(node): return (constants.LX_RADIX_TREE_MAP_SIZE << node['shift']) - 1 -def lookup(root, index): +def resolve_root(root): + if root.type == radix_tree_root_type.get_type(): + return root if root.type == radix_tree_root_type.get_type().pointer(): - node = root.dereference() - elif root.type != radix_tree_root_type.get_type(): - raise gdb.GdbError("must be {} not {}" - .format(radix_tree_root_type.get_type(), root.type)) + return root.dereference() + raise gdb.GdbError("must be {} not {}" + .format(radix_tree_root_type.get_type(), root.type)) +def lookup(root, index): + root = resolve_root(root) node = root['xa_head'] if node == 0: return None @@ -71,14 +74,120 @@ def lookup(root, index): return node -class LxRadixTree(gdb.Function): +def descend(parent, index): + offset = (index >> int(parent["shift"])) & constants.LX_RADIX_TREE_MAP_MASK + return offset, parent["slots"][offset] + +def load_root(root): + node = root["xa_head"] + nodep = node + + if is_internal_node(node): + node = entry_to_node(node) + maxindex = node_maxindex(node) + return int(node["shift"]) + constants.LX_RADIX_TREE_MAP_SHIFT, \ + nodep, maxindex + + return 0, nodep, 0 + +class RadixTreeIter: + def __init__(self, start): + self.index = 0 + self.next_index = start + self.node = None + +def xa_mk_internal(v): + return (v << 2) | 2 + +LX_XA_RETRY_ENTRY = xa_mk_internal(256) +LX_RADIX_TREE_RETRY = LX_XA_RETRY_ENTRY + +def next_chunk(root, iter): + mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1 + + index = iter.next_index + if index == 0 and iter.index != 0: + return None + + restart = True + while restart: + restart = False + + _, child, maxindex = load_root(root) + if index > maxindex: + return None + if not child: + return None + + if not is_internal_node(child): + iter.index = index + iter.next_index = (maxindex + 1) & mask + iter.node = None + return root["xa_head"].address + + while True: + node = entry_to_node(child) + offset, child = descend(node, index) + + if not child: + while True: + offset += 1 + if offset >= constants.LX_RADIX_TREE_MAP_SIZE: + break + slot = node["slots"][offset] + if slot: + break + index &= ~node_maxindex(node) + index = (index + (offset << int(node["shift"]))) & mask + if index == 0: + return None + if offset == constants.LX_RADIX_TREE_MAP_SIZE: + restart = True + break + child = node["slots"][offset] + + if not child: + restart = True + break + if child == LX_XA_RETRY_ENTRY: + break + if not node["shift"] or not is_internal_node(child): + break + + iter.index = (index & ~node_maxindex(node)) | offset + iter.next_index = ((index | node_maxindex(node)) + 1) & mask + iter.node = node + + return node["slots"][offset].address + +def next_slot(slot, iter): + mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1 + for _ in range(iter.next_index - iter.index - 1): + slot += 1 + iter.index = (iter.index + 1) & mask + if slot.dereference(): + return slot + return None + +def for_each_slot(root, start=0): + iter = RadixTreeIter(start) + slot = None + while True: + if not slot: + slot = next_chunk(root, iter) + if not slot: + break + yield iter.index, slot + slot = next_slot(slot, iter) + +class LxRadixTreeLookup(gdb.Function): """ Lookup and return a node from a RadixTree. $lx_radix_tree_lookup(root_node [, index]): Return the node at the given index. If index is omitted, the root node is dereference and returned.""" def __init__(self): - super(LxRadixTree, self).__init__("lx_radix_tree_lookup") + super(LxRadixTreeLookup, self).__init__("lx_radix_tree_lookup") def invoke(self, root, index=0): result = lookup(root, index) @@ -87,4 +196,20 @@ If index is omitted, the root node is dereference and returned.""" return result +class LxRadixTree(gdb.Command): + """Show all values stored in a RadixTree.""" + + def __init__(self): + super(LxRadixTree, self).__init__("lx-radix-tree", gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, argument, from_tty): + args = gdb.string_to_argv(argument) + if len(args) != 1: + raise gdb.GdbError("Usage: lx-radix-tree ROOT") + root = gdb.parse_and_eval(args[0]) + for index, slot in for_each_slot(root): + gdb.write("[{}] = {}\n".format(index, slot.dereference())) + LxRadixTree() +LxRadixTreeLookup() -- cgit v1.2.3 From 581ee79a254759ea8288057f762389820b39adcc Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 6 Nov 2025 13:43:42 +0100 Subject: scripts/gdb/symbols: make BPF debug info available to GDB One can debug BPF programs with QEMU gdbstub by setting a breakpoint on bpf_prog_kallsyms_add(), waiting for a hit with a matching aux.name, and then setting a breakpoint on bpf_func. This is tedious, error-prone, and also lacks line numbers. Automate this in a way similar to the existing support for modules in lx-symbols. Enumerate and monitor changes to both BPF kallsyms and JITed progs. For each ksym, generate and compile a synthetic .s file containing the name, code, and size. In addition, if this ksym is also a prog, and not a trampoline, add line number information. Ensure that this is a no-op if the kernel is built without BPF support or if "as" is missing. In theory the "as" dependency may be dropped by generating the synthetic .o file manually, but this is too much complexity for too little benefit. Now one can debug BPF progs out of the box like this: (gdb) lx-symbols -bpf (gdb) b bpf_prog_4e612a6a881a086b_arena_list_add Breakpoint 2 (bpf_prog_4e612a6a881a086b_arena_list_add) pending. # ./test_progs -t arena_list Thread 4 hit Breakpoint 2, bpf_prog_4e612a6a881a086b_arena_list_add () at linux/tools/testing/selftests/bpf/progs/arena_list.c:51 51 list_head = &global_head; (gdb) n bpf_prog_4e612a6a881a086b_arena_list_add () at linux/tools/testing/selftests/bpf/progs/arena_list.c:53 53 for (i = zero; i < cnt && can_loop; i++) { This also works for subprogs. Link: https://lkml.kernel.org/r/20251106124600.86736-3-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Cc: Alexander Gordeev Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkman Cc: Heiko Carstens Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- scripts/gdb/linux/bpf.py | 253 ++++++++++++++++++++++++++++++++++++++ scripts/gdb/linux/constants.py.in | 3 + scripts/gdb/linux/symbols.py | 105 ++++++++++++++-- 3 files changed, 349 insertions(+), 12 deletions(-) create mode 100644 scripts/gdb/linux/bpf.py (limited to 'scripts') diff --git a/scripts/gdb/linux/bpf.py b/scripts/gdb/linux/bpf.py new file mode 100644 index 000000000000..1870534ef6f9 --- /dev/null +++ b/scripts/gdb/linux/bpf.py @@ -0,0 +1,253 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json +import subprocess +import tempfile + +import gdb + +from linux import constants, lists, radixtree, utils + + +if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT: + bpf_ksym_type = utils.CachedType("struct bpf_ksym") +if constants.LX_CONFIG_BPF_SYSCALL: + bpf_prog_type = utils.CachedType("struct bpf_prog") + + +def get_ksym_name(ksym): + name = ksym["name"].bytes + end = name.find(b"\x00") + if end != -1: + name = name[:end] + return name.decode() + + +def list_ksyms(): + if not (constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT): + return [] + bpf_kallsyms = gdb.parse_and_eval("&bpf_kallsyms") + bpf_ksym_ptr_type = bpf_ksym_type.get_type().pointer() + return list(lists.list_for_each_entry(bpf_kallsyms, + bpf_ksym_ptr_type, + "lnode")) + + +class KsymAddBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(KsymAddBreakpoint, self).__init__("bpf_ksym_add", internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.add(gdb.parse_and_eval("ksym")) + return False + + +class KsymRemoveBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(KsymRemoveBreakpoint, self).__init__("bpf_ksym_del", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.remove(gdb.parse_and_eval("ksym")) + return False + + +class KsymMonitor: + def __init__(self, add, remove): + self.add = add + self.remove = remove + + self.add_bp = KsymAddBreakpoint(self) + self.remove_bp = KsymRemoveBreakpoint(self) + + self.notify_initial() + + def notify_initial(self): + for ksym in list_ksyms(): + self.add(ksym) + + def delete(self): + self.add_bp.delete() + self.remove_bp.delete() + + +def list_progs(): + if not constants.LX_CONFIG_BPF_SYSCALL: + return [] + idr_rt = gdb.parse_and_eval("&prog_idr.idr_rt") + bpf_prog_ptr_type = bpf_prog_type.get_type().pointer() + progs = [] + for _, slot in radixtree.for_each_slot(idr_rt): + prog = slot.dereference().cast(bpf_prog_ptr_type) + progs.append(prog) + # Subprogs are not registered in prog_idr, fetch them manually. + # func[0] is the current prog. + aux = prog["aux"] + func = aux["func"] + real_func_cnt = int(aux["real_func_cnt"]) + for i in range(1, real_func_cnt): + progs.append(func[i]) + return progs + + +class ProgAddBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(ProgAddBreakpoint, self).__init__("bpf_prog_kallsyms_add", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.add(gdb.parse_and_eval("fp")) + return False + + +class ProgRemoveBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(ProgRemoveBreakpoint, self).__init__("bpf_prog_free_id", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.remove(gdb.parse_and_eval("prog")) + return False + + +class ProgMonitor: + def __init__(self, add, remove): + self.add = add + self.remove = remove + + self.add_bp = ProgAddBreakpoint(self) + self.remove_bp = ProgRemoveBreakpoint(self) + + self.notify_initial() + + def notify_initial(self): + for prog in list_progs(): + self.add(prog) + + def delete(self): + self.add_bp.delete() + self.remove_bp.delete() + + +def btf_str_by_offset(btf, offset): + while offset < btf["start_str_off"]: + btf = btf["base_btf"] + + offset -= btf["start_str_off"] + if offset < btf["hdr"]["str_len"]: + return (btf["strings"] + offset).string() + + return None + + +def bpf_line_info_line_num(line_col): + return line_col >> 10 + + +def bpf_line_info_line_col(line_col): + return line_col & 0x3ff + + +class LInfoIter: + def __init__(self, prog): + # See bpf_prog_get_file_line() for details. + self.pos = 0 + self.nr_linfo = 0 + + if prog is None: + return + + self.bpf_func = int(prog["bpf_func"]) + aux = prog["aux"] + self.btf = aux["btf"] + linfo_idx = aux["linfo_idx"] + self.nr_linfo = int(aux["nr_linfo"]) - linfo_idx + if self.nr_linfo == 0: + return + + linfo_ptr = aux["linfo"] + tpe = linfo_ptr.type.target().array(self.nr_linfo).pointer() + self.linfo = (linfo_ptr + linfo_idx).cast(tpe).dereference() + jited_linfo_ptr = aux["jited_linfo"] + tpe = jited_linfo_ptr.type.target().array(self.nr_linfo).pointer() + self.jited_linfo = (jited_linfo_ptr + linfo_idx).cast(tpe).dereference() + + self.filenos = {} + + def get_code_off(self): + if self.pos >= self.nr_linfo: + return -1 + return self.jited_linfo[self.pos] - self.bpf_func + + def advance(self): + self.pos += 1 + + def get_fileno(self): + file_name_off = int(self.linfo[self.pos]["file_name_off"]) + fileno = self.filenos.get(file_name_off) + if fileno is not None: + return fileno, None + file_name = btf_str_by_offset(self.btf, file_name_off) + fileno = len(self.filenos) + 1 + self.filenos[file_name_off] = fileno + return fileno, file_name + + def get_line_col(self): + line_col = int(self.linfo[self.pos]["line_col"]) + return bpf_line_info_line_num(line_col), \ + bpf_line_info_line_col(line_col) + + +def generate_debug_obj(ksym, prog): + name = get_ksym_name(ksym) + # Avoid read_memory(); it throws bogus gdb.MemoryError in some contexts. + start = ksym["start"] + code = start.cast(gdb.lookup_type("unsigned char") + .array(int(ksym["end"]) - int(start)) + .pointer()).dereference().bytes + linfo_iter = LInfoIter(prog) + + result = tempfile.NamedTemporaryFile(suffix=".o", mode="wb") + try: + with tempfile.NamedTemporaryFile(suffix=".s", mode="w") as src: + # ".loc" does not apply to ".byte"s, only to ".insn"s, but since + # this needs to work for all architectures, the latter are not an + # option. Ask the assembler to apply ".loc"s to labels as well, + # and generate dummy labels after each ".loc". + src.write(".loc_mark_labels 1\n") + + src.write(".globl {}\n".format(name)) + src.write(".type {},@function\n".format(name)) + src.write("{}:\n".format(name)) + for code_off, code_byte in enumerate(code): + if linfo_iter.get_code_off() == code_off: + fileno, file_name = linfo_iter.get_fileno() + if file_name is not None: + src.write(".file {} {}\n".format( + fileno, json.dumps(file_name))) + line, col = linfo_iter.get_line_col() + src.write(".loc {} {} {}\n".format(fileno, line, col)) + src.write("0:\n") + linfo_iter.advance() + src.write(".byte {}\n".format(code_byte)) + src.write(".size {},{}\n".format(name, len(code))) + src.flush() + + try: + subprocess.check_call(["as", "-c", src.name, "-o", result.name]) + except FileNotFoundError: + # "as" is not installed. + result.close() + return None + return result + except: + result.close() + raise diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index c3886739a028..6d475540c6ba 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -170,3 +170,6 @@ LX_CONFIG(CONFIG_PAGE_OWNER) LX_CONFIG(CONFIG_SLUB_DEBUG) LX_CONFIG(CONFIG_SLAB_FREELIST_HARDENED) LX_CONFIG(CONFIG_MMU) +LX_CONFIG(CONFIG_BPF) +LX_CONFIG(CONFIG_BPF_JIT) +LX_CONFIG(CONFIG_BPF_SYSCALL) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 6edb99221675..d4308b726183 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -11,13 +11,14 @@ # This work is licensed under the terms of the GNU GPL version 2. # +import atexit import gdb import os import re import struct from itertools import count -from linux import modules, utils, constants +from linux import bpf, constants, modules, utils if hasattr(gdb, 'Breakpoint'): @@ -114,17 +115,27 @@ class LxSymbols(gdb.Command): The kernel (vmlinux) is taken from the current working directly. Modules (.ko) are scanned recursively, starting in the same directory. Optionally, the module search path can be extended by a space separated list of paths passed to the -lx-symbols command.""" +lx-symbols command. + +When the -bpf flag is specified, symbols from the currently loaded BPF programs +are loaded as well.""" module_paths = [] module_files = [] module_files_updated = False loaded_modules = [] breakpoint = None + bpf_prog_monitor = None + bpf_ksym_monitor = None + bpf_progs = {} + # The remove-symbol-file command, even when invoked with -a, requires the + # respective object file to exist, so keep them around. + bpf_debug_objs = {} def __init__(self): super(LxSymbols, self).__init__("lx-symbols", gdb.COMMAND_FILES, gdb.COMPLETE_FILENAME) + atexit.register(self.cleanup_bpf) def _update_module_files(self): self.module_files = [] @@ -197,6 +208,51 @@ lx-symbols command.""" else: gdb.write("no module object found for '{0}'\n".format(module_name)) + def add_bpf_prog(self, prog): + if prog["jited"]: + self.bpf_progs[int(prog["bpf_func"])] = prog + + def remove_bpf_prog(self, prog): + self.bpf_progs.pop(int(prog["bpf_func"]), None) + + def add_bpf_ksym(self, ksym): + addr = int(ksym["start"]) + name = bpf.get_ksym_name(ksym) + with utils.pagination_off(): + gdb.write("loading @{addr}: {name}\n".format( + addr=hex(addr), name=name)) + debug_obj = bpf.generate_debug_obj(ksym, self.bpf_progs.get(addr)) + if debug_obj is None: + return + try: + cmdline = "add-symbol-file {obj} {addr}".format( + obj=debug_obj.name, addr=hex(addr)) + gdb.execute(cmdline, to_string=True) + except: + debug_obj.close() + raise + self.bpf_debug_objs[addr] = debug_obj + + def remove_bpf_ksym(self, ksym): + addr = int(ksym["start"]) + debug_obj = self.bpf_debug_objs.pop(addr, None) + if debug_obj is None: + return + try: + name = bpf.get_ksym_name(ksym) + gdb.write("unloading @{addr}: {name}\n".format( + addr=hex(addr), name=name)) + cmdline = "remove-symbol-file {path}".format(path=debug_obj.name) + gdb.execute(cmdline, to_string=True) + finally: + debug_obj.close() + + def cleanup_bpf(self): + self.bpf_progs = {} + while len(self.bpf_debug_objs) > 0: + self.bpf_debug_objs.popitem()[1].close() + + def load_all_symbols(self): gdb.write("loading vmlinux\n") @@ -224,34 +280,59 @@ lx-symbols command.""" else: [self.load_module_symbols(module) for module in module_list] + self.cleanup_bpf() + if self.bpf_prog_monitor is not None: + self.bpf_prog_monitor.notify_initial() + if self.bpf_ksym_monitor is not None: + self.bpf_ksym_monitor.notify_initial() + for saved_state in saved_states: saved_state['breakpoint'].enabled = saved_state['enabled'] def invoke(self, arg, from_tty): skip_decompressor() - self.module_paths = [os.path.abspath(os.path.expanduser(p)) - for p in arg.split()] + monitor_bpf = False + self.module_paths = [] + for p in arg.split(): + if p == "-bpf": + monitor_bpf = True + else: + p.append(os.path.abspath(os.path.expanduser(p))) self.module_paths.append(os.getcwd()) + if self.breakpoint is not None: + self.breakpoint.delete() + self.breakpoint = None + if self.bpf_prog_monitor is not None: + self.bpf_prog_monitor.delete() + self.bpf_prog_monitor = None + if self.bpf_ksym_monitor is not None: + self.bpf_ksym_monitor.delete() + self.bpf_ksym_monitor = None + # enforce update self.module_files = [] self.module_files_updated = False self.load_all_symbols() - if not modules.has_modules(): + if not hasattr(gdb, 'Breakpoint'): + gdb.write("Note: symbol update on module and BPF loading not " + "supported with this gdb version\n") return - if hasattr(gdb, 'Breakpoint'): - if self.breakpoint is not None: - self.breakpoint.delete() - self.breakpoint = None + if modules.has_modules(): self.breakpoint = LoadModuleBreakpoint( "kernel/module/main.c:do_init_module", self) - else: - gdb.write("Note: symbol update on module loading not supported " - "with this gdb version\n") + + if monitor_bpf: + if constants.LX_CONFIG_BPF_SYSCALL: + self.bpf_prog_monitor = bpf.ProgMonitor(self.add_bpf_prog, + self.remove_bpf_prog) + if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT: + self.bpf_ksym_monitor = bpf.KsymMonitor(self.add_bpf_ksym, + self.remove_bpf_ksym) LxSymbols() -- cgit v1.2.3 From 93863f3f859a626347ce2ec18947b11357b4ca14 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 20 Nov 2025 12:14:20 -0800 Subject: kbuild: Check for functions with ambiguous -ffunction-sections section names Commit 9c7dc1dd897a ("objtool: Warn on functions with ambiguous -ffunction-sections section names") only works for drivers which are compiled on architectures supported by objtool. Make a script to perform the same check for all architectures. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/a6a49644a34964f7e02f3a8ce43af03e72817180.1763669451.git.jpoimboe@kernel.org --- include/asm-generic/vmlinux.lds.h | 2 +- scripts/Makefile.vmlinux_o | 4 ++++ scripts/check-function-names.sh | 25 +++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100755 scripts/check-function-names.sh (limited to 'scripts') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5efe1de2209b..0cdae6f809b5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -110,7 +110,7 @@ * .text.startup could be __attribute__((constructor)) code in a *non* * ffunction-sections object, which should be placed in .init.text; or it could * be an actual function named startup() in an ffunction-sections object, which - * should be placed in .text. Objtool will detect and complain about any such + * should be placed in .text. The build will detect and complain about any such * ambiguously named functions. */ #define TEXT_MAIN \ diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 20533cc0b1ee..527352c222ff 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -63,11 +63,15 @@ quiet_cmd_ld_vmlinux.o = LD $@ --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ $(cmd_objtool) +cmd_check_function_names = $(srctree)/scripts/check-function-names.sh $@ + define rule_ld_vmlinux.o $(call cmd_and_savecmd,ld_vmlinux.o) $(call cmd,gen_objtooldep) + $(call cmd,check_function_names) endef + vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed_rule,ld_vmlinux.o) diff --git a/scripts/check-function-names.sh b/scripts/check-function-names.sh new file mode 100755 index 000000000000..410042591cfc --- /dev/null +++ b/scripts/check-function-names.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Certain function names are disallowed due to section name ambiguities +# introduced by -ffunction-sections. +# +# See the comment above TEXT_MAIN in include/asm-generic/vmlinux.lds.h. + +objfile="$1" + +if [ ! -f "$objfile" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +bad_symbols=$(nm "$objfile" | awk '$2 ~ /^[TtWw]$/ {print $3}' | grep -E '^(startup|exit|split|unlikely|hot|unknown)(\.|$)') + +if [ -n "$bad_symbols" ]; then + echo "$bad_symbols" | while read -r sym; do + echo "$objfile: error: $sym() function name creates ambiguity with -ffunction-sections" >&2 + done + exit 1 +fi + +exit 0 -- cgit v1.2.3 From 1181c974421818ff7318e3a211c87b5dd437c13e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:14 +0100 Subject: rust: kbuild: simplify `--cfg` handling We need to handle `cfg`s in both `rustc` and `rust-analyzer`, and in future commits some of those contain double quotes, which complicates things further. Thus, instead of removing the `--cfg ` part in the rust-analyzer generation script, have the `*-cfgs` variables contain just the actual `cfg`, and use that to generate the actual flags in `*-flags`. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-3-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 6 ++++-- scripts/generate_rust_analyzer.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/rust/Makefile b/rust/Makefile index ce1853a09d3d..9967f3457d44 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -60,8 +60,10 @@ rustdoc_test_quiet=--test-args -q rustdoc_test_kernel_quiet=>/dev/null endif +cfgs-to-flags = $(patsubst %,--cfg='%',$1) + core-cfgs := \ - --cfg no_fp_fmt_parse + no_fp_fmt_parse core-edition := $(if $(call rustc-min-version,108700),2024,2021) @@ -72,7 +74,7 @@ core-skip_flags := \ core-flags := \ --edition=$(core-edition) \ - $(core-cfgs) + $(call cfgs-to-flags,$(core-cfgs)) # `rustdoc` did not save the target modifiers, thus workaround for # the time being (https://github.com/rust-lang/rust/issues/144521). diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index fc27f0cca752..dedca470adc1 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -15,7 +15,7 @@ def args_crates_cfgs(cfgs): crates_cfgs = {} for cfg in cfgs: crate, vals = cfg.split("=", 1) - crates_cfgs[crate] = vals.replace("--cfg", "").split() + crates_cfgs[crate] = vals.split() return crates_cfgs -- cgit v1.2.3 From 158a3b72118a4dab7e7bf2d89afbab9b96eddc1c Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:22 +0100 Subject: rust: proc-macro2: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. `proc_macro_byte_character` and `proc_macro_c_str_literals` were stabilized in Rust 1.79.0 [1] and were implemented earlier than our minimum Rust version (1.78) [2][3]. Thus just enable them instead of using the `cfg` that `proc-macro2` uses to emulate them in older compilers. In addition, skip formatting for this vendored crate and take the chance to add a comment mentioning this. Link: https://github.com/rust-lang/rust/pull/123431 [1] Link: https://github.com/rust-lang/rust/pull/112711 [2] Link: https://github.com/rust-lang/rust/pull/119651 [3] Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-11-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 5 +++++ rust/Makefile | 32 ++++++++++++++++++++++++++++++-- scripts/generate_rust_analyzer.py | 7 +++++++ 3 files changed, 42 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/Makefile b/Makefile index d14824792227..589bcfe3bf75 100644 --- a/Makefile +++ b/Makefile @@ -1826,10 +1826,15 @@ rusttest: prepare $(Q)$(MAKE) $(build)=rust $@ # Formatting targets +# +# Generated files as well as vendored crates are skipped. PHONY += rustfmt rustfmtcheck rustfmt: $(Q)find $(srctree) $(RCS_FIND_IGNORE) \ + \( \ + -path $(srctree)/rust/proc-macro2 \ + \) -prune -o \ -type f -a -name '*.rs' -a ! -name '*generated*' -print \ | xargs $(RUSTFMT) $(rustfmt_flags) diff --git a/rust/Makefile b/rust/Makefile index 0288ca8d270c..7dd1261ad98f 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -27,6 +27,8 @@ endif obj-$(CONFIG_RUST) += exports.o +always-$(CONFIG_RUST) += libproc_macro2.rlib + always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.rs always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.c @@ -76,6 +78,17 @@ core-flags := \ --edition=$(core-edition) \ $(call cfgs-to-flags,$(core-cfgs)) +proc_macro2-cfgs := \ + feature="proc-macro" \ + wrap_proc_macro \ + $(if $(call rustc-min-version,108800),proc_macro_span_file proc_macro_span_location) + +# Stable since Rust 1.79.0: `feature(proc_macro_byte_character,proc_macro_c_str_literals)`. +proc_macro2-flags := \ + --cap-lints=allow \ + -Zcrate-attr='feature(proc_macro_byte_character,proc_macro_c_str_literals)' \ + $(call cfgs-to-flags,$(proc_macro2-cfgs)) + # `rustdoc` did not save the target modifiers, thus workaround for # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) @@ -125,10 +138,15 @@ rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \ $(Q)for f in $(rustdoc_output)/static.files/rustdoc-*.css; do \ echo ".logo-container > img { object-fit: contain; }" >> $$f; done +rustdoc-proc_macro2: private rustdoc_host = yes +rustdoc-proc_macro2: private rustc_target_flags = $(proc_macro2-flags) +rustdoc-proc_macro2: $(src)/proc-macro2/lib.rs rustdoc-clean FORCE + +$(call if_changed,rustdoc) + rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ --extern proc_macro -rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean FORCE +rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean rustdoc-proc_macro2 FORCE +$(call if_changed,rustdoc) # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should @@ -185,6 +203,10 @@ rusttestlib-build_error: $(src)/build_error.rs FORCE rusttestlib-ffi: $(src)/ffi.rs FORCE +$(call if_changed,rustc_test_library) +rusttestlib-proc_macro2: private rustc_target_flags = $(proc_macro2-flags) +rusttestlib-proc_macro2: $(src)/proc-macro2/lib.rs FORCE + +$(call if_changed,rustc_test_library) + rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes rusttestlib-macros: $(src)/macros/lib.rs FORCE @@ -431,6 +453,11 @@ quiet_cmd_rustc_procmacrolibrary = $(RUSTC_OR_CLIPPY_QUIET) PL $@ mv $(objtree)/$(obj)/$(patsubst lib%.rlib,%,$(notdir $@)).d $(depfile); \ sed -i '/^\#/d' $(depfile) +$(obj)/libproc_macro2.rlib: private skip_clippy = 1 +$(obj)/libproc_macro2.rlib: private rustc_target_flags = $(proc_macro2-flags) +$(obj)/libproc_macro2.rlib: $(src)/proc-macro2/lib.rs FORCE + +$(call if_changed_dep,rustc_procmacrolibrary) + quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ cmd_rustc_procmacro = \ $(RUSTC_OR_CLIPPY) $(rust_common_flags) $(rustc_target_flags) \ @@ -442,7 +469,7 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ @$(objtree)/include/generated/rustc_cfg $< # Procedural macros can only be used with the `rustc` that compiled it. -$(obj)/$(libmacros_name): $(src)/macros/lib.rs FORCE +$(obj)/$(libmacros_name): $(src)/macros/lib.rs $(obj)/libproc_macro2.rlib FORCE +$(call if_changed_dep,rustc_procmacro) $(obj)/$(libpin_init_internal_name): private rustc_target_flags = --cfg kernel @@ -465,6 +492,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L rust-analyzer: $(Q)MAKEFLAGS= $(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' $(core-edition) \ + --cfgs='proc_macro2=$(proc_macro2-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ $(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \ > rust-project.json diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index dedca470adc1..00c6b7cc94b7 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -86,6 +86,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit [], ) + append_crate( + "proc_macro2", + srctree / "rust" / "proc-macro2" / "lib.rs", + ["core", "alloc", "std", "proc_macro"], + cfg=crates_cfgs["proc_macro2"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 88de91cc1ce7b3069ccabc1a5fbe16d41c663093 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:26 +0100 Subject: rust: quote: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-15-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/Makefile | 38 +++++++++++++++++++++++++++++++++++--- scripts/generate_rust_analyzer.py | 7 +++++++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/Makefile b/Makefile index 589bcfe3bf75..85da055e4f00 100644 --- a/Makefile +++ b/Makefile @@ -1834,6 +1834,7 @@ rustfmt: $(Q)find $(srctree) $(RCS_FIND_IGNORE) \ \( \ -path $(srctree)/rust/proc-macro2 \ + -o -path $(srctree)/rust/quote \ \) -prune -o \ -type f -a -name '*.rs' -a ! -name '*generated*' -print \ | xargs $(RUSTFMT) $(rustfmt_flags) diff --git a/rust/Makefile b/rust/Makefile index 7dd1261ad98f..4f4a00594142 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -27,7 +27,7 @@ endif obj-$(CONFIG_RUST) += exports.o -always-$(CONFIG_RUST) += libproc_macro2.rlib +always-$(CONFIG_RUST) += libproc_macro2.rlib libquote.rlib always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.rs always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.c @@ -89,6 +89,18 @@ proc_macro2-flags := \ -Zcrate-attr='feature(proc_macro_byte_character,proc_macro_c_str_literals)' \ $(call cfgs-to-flags,$(proc_macro2-cfgs)) +quote-cfgs := \ + feature="proc-macro" + +quote-skip_flags := \ + --edition=2021 + +quote-flags := \ + --edition=2018 \ + --cap-lints=allow \ + --extern proc_macro2 \ + $(call cfgs-to-flags,$(quote-cfgs)) + # `rustdoc` did not save the target modifiers, thus workaround for # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) @@ -143,10 +155,17 @@ rustdoc-proc_macro2: private rustc_target_flags = $(proc_macro2-flags) rustdoc-proc_macro2: $(src)/proc-macro2/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) +rustdoc-quote: private rustdoc_host = yes +rustdoc-quote: private rustc_target_flags = $(quote-flags) +rustdoc-quote: private skip_flags = $(quote-skip_flags) +rustdoc-quote: $(src)/quote/lib.rs rustdoc-clean rustdoc-proc_macro2 FORCE + +$(call if_changed,rustdoc) + rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ --extern proc_macro -rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean rustdoc-proc_macro2 FORCE +rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean rustdoc-proc_macro2 \ + rustdoc-quote FORCE +$(call if_changed,rustdoc) # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should @@ -207,6 +226,11 @@ rusttestlib-proc_macro2: private rustc_target_flags = $(proc_macro2-flags) rusttestlib-proc_macro2: $(src)/proc-macro2/lib.rs FORCE +$(call if_changed,rustc_test_library) +rusttestlib-quote: private skip_flags = $(quote-skip_flags) +rusttestlib-quote: private rustc_target_flags = $(quote-flags) +rusttestlib-quote: $(src)/quote/lib.rs rusttestlib-proc_macro2 FORCE + +$(call if_changed,rustc_test_library) + rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes rusttestlib-macros: $(src)/macros/lib.rs FORCE @@ -458,6 +482,12 @@ $(obj)/libproc_macro2.rlib: private rustc_target_flags = $(proc_macro2-flags) $(obj)/libproc_macro2.rlib: $(src)/proc-macro2/lib.rs FORCE +$(call if_changed_dep,rustc_procmacrolibrary) +$(obj)/libquote.rlib: private skip_clippy = 1 +$(obj)/libquote.rlib: private skip_flags = $(quote-skip_flags) +$(obj)/libquote.rlib: private rustc_target_flags = $(quote-flags) +$(obj)/libquote.rlib: $(src)/quote/lib.rs $(obj)/libproc_macro2.rlib FORCE + +$(call if_changed_dep,rustc_procmacrolibrary) + quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ cmd_rustc_procmacro = \ $(RUSTC_OR_CLIPPY) $(rust_common_flags) $(rustc_target_flags) \ @@ -469,7 +499,8 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ @$(objtree)/include/generated/rustc_cfg $< # Procedural macros can only be used with the `rustc` that compiled it. -$(obj)/$(libmacros_name): $(src)/macros/lib.rs $(obj)/libproc_macro2.rlib FORCE +$(obj)/$(libmacros_name): $(src)/macros/lib.rs $(obj)/libproc_macro2.rlib \ + $(obj)/libquote.rlib FORCE +$(call if_changed_dep,rustc_procmacro) $(obj)/$(libpin_init_internal_name): private rustc_target_flags = --cfg kernel @@ -493,6 +524,7 @@ rust-analyzer: $(Q)MAKEFLAGS= $(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' $(core-edition) \ --cfgs='proc_macro2=$(proc_macro2-cfgs)' \ + --cfgs='quote=$(quote-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ $(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \ > rust-project.json diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 00c6b7cc94b7..4faf153ed2ee 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -93,6 +93,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit cfg=crates_cfgs["proc_macro2"], ) + append_crate( + "quote", + srctree / "rust" / "quote" / "lib.rs", + ["alloc", "proc_macro", "proc_macro2"], + cfg=crates_cfgs["quote"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 737401751ace2d806de6854aee52c176141d10e2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:31 +0100 Subject: rust: syn: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-20-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/Makefile | 37 ++++++++++++++++++++++++++++++++++--- scripts/generate_rust_analyzer.py | 7 +++++++ 3 files changed, 42 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/Makefile b/Makefile index 85da055e4f00..96ddbaae7e12 100644 --- a/Makefile +++ b/Makefile @@ -1835,6 +1835,7 @@ rustfmt: \( \ -path $(srctree)/rust/proc-macro2 \ -o -path $(srctree)/rust/quote \ + -o -path $(srctree)/rust/syn \ \) -prune -o \ -type f -a -name '*.rs' -a ! -name '*generated*' -print \ | xargs $(RUSTFMT) $(rustfmt_flags) diff --git a/rust/Makefile b/rust/Makefile index 4f4a00594142..8988ecf32531 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -27,7 +27,7 @@ endif obj-$(CONFIG_RUST) += exports.o -always-$(CONFIG_RUST) += libproc_macro2.rlib libquote.rlib +always-$(CONFIG_RUST) += libproc_macro2.rlib libquote.rlib libsyn.rlib always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.rs always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.c @@ -101,6 +101,22 @@ quote-flags := \ --extern proc_macro2 \ $(call cfgs-to-flags,$(quote-cfgs)) +# `extra-traits`, `fold` and `visit` may be enabled if needed. +syn-cfgs := \ + feature="clone-impls" \ + feature="derive" \ + feature="full" \ + feature="parsing" \ + feature="printing" \ + feature="proc-macro" \ + feature="visit-mut" + +syn-flags := \ + --cap-lints=allow \ + --extern proc_macro2 \ + --extern quote \ + $(call cfgs-to-flags,$(syn-cfgs)) + # `rustdoc` did not save the target modifiers, thus workaround for # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) @@ -161,11 +177,16 @@ rustdoc-quote: private skip_flags = $(quote-skip_flags) rustdoc-quote: $(src)/quote/lib.rs rustdoc-clean rustdoc-proc_macro2 FORCE +$(call if_changed,rustdoc) +rustdoc-syn: private rustdoc_host = yes +rustdoc-syn: private rustc_target_flags = $(syn-flags) +rustdoc-syn: $(src)/syn/lib.rs rustdoc-clean rustdoc-quote FORCE + +$(call if_changed,rustdoc) + rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ --extern proc_macro rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean rustdoc-proc_macro2 \ - rustdoc-quote FORCE + rustdoc-quote rustdoc-syn FORCE +$(call if_changed,rustdoc) # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should @@ -231,6 +252,10 @@ rusttestlib-quote: private rustc_target_flags = $(quote-flags) rusttestlib-quote: $(src)/quote/lib.rs rusttestlib-proc_macro2 FORCE +$(call if_changed,rustc_test_library) +rusttestlib-syn: private rustc_target_flags = $(syn-flags) +rusttestlib-syn: $(src)/syn/lib.rs rusttestlib-quote FORCE + +$(call if_changed,rustc_test_library) + rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes rusttestlib-macros: $(src)/macros/lib.rs FORCE @@ -488,6 +513,11 @@ $(obj)/libquote.rlib: private rustc_target_flags = $(quote-flags) $(obj)/libquote.rlib: $(src)/quote/lib.rs $(obj)/libproc_macro2.rlib FORCE +$(call if_changed_dep,rustc_procmacrolibrary) +$(obj)/libsyn.rlib: private skip_clippy = 1 +$(obj)/libsyn.rlib: private rustc_target_flags = $(syn-flags) +$(obj)/libsyn.rlib: $(src)/syn/lib.rs $(obj)/libquote.rlib FORCE + +$(call if_changed_dep,rustc_procmacrolibrary) + quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ cmd_rustc_procmacro = \ $(RUSTC_OR_CLIPPY) $(rust_common_flags) $(rustc_target_flags) \ @@ -500,7 +530,7 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ # Procedural macros can only be used with the `rustc` that compiled it. $(obj)/$(libmacros_name): $(src)/macros/lib.rs $(obj)/libproc_macro2.rlib \ - $(obj)/libquote.rlib FORCE + $(obj)/libquote.rlib $(obj)/libsyn.rlib FORCE +$(call if_changed_dep,rustc_procmacro) $(obj)/$(libpin_init_internal_name): private rustc_target_flags = --cfg kernel @@ -525,6 +555,7 @@ rust-analyzer: --cfgs='core=$(core-cfgs)' $(core-edition) \ --cfgs='proc_macro2=$(proc_macro2-cfgs)' \ --cfgs='quote=$(quote-cfgs)' \ + --cfgs='syn=$(syn-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ $(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \ > rust-project.json diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 4faf153ed2ee..5b6f7b8d6918 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -100,6 +100,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit cfg=crates_cfgs["quote"], ) + append_crate( + "syn", + srctree / "rust" / "syn" / "lib.rs", + ["proc_macro", "proc_macro2", "quote"], + cfg=crates_cfgs["syn"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 52ba807f1aa6ac16289e9dc9e381475305afd685 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:32 +0100 Subject: rust: macros: support `proc-macro2`, `quote` and `syn` One of the two main uses cases for adding `proc-macro2`, `quote` and `syn` is the `macros` crates (and the other `pin-init`). Thus add the support for the crates in `macros` already. Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-21-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 13 +++++++++---- scripts/generate_rust_analyzer.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/rust/Makefile b/rust/Makefile index 8988ecf32531..d7d19c21b671 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -184,7 +184,7 @@ rustdoc-syn: $(src)/syn/lib.rs rustdoc-clean rustdoc-quote FORCE rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ - --extern proc_macro + --extern proc_macro --extern proc_macro2 --extern quote --extern syn rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean rustdoc-proc_macro2 \ rustdoc-quote rustdoc-syn FORCE +$(call if_changed,rustdoc) @@ -256,9 +256,11 @@ rusttestlib-syn: private rustc_target_flags = $(syn-flags) rusttestlib-syn: $(src)/syn/lib.rs rusttestlib-quote FORCE +$(call if_changed,rustc_test_library) -rusttestlib-macros: private rustc_target_flags = --extern proc_macro +rusttestlib-macros: private rustc_target_flags = --extern proc_macro \ + --extern proc_macro2 --extern quote --extern syn rusttestlib-macros: private rustc_test_library_proc = yes -rusttestlib-macros: $(src)/macros/lib.rs FORCE +rusttestlib-macros: $(src)/macros/lib.rs \ + rusttestlib-proc_macro2 rusttestlib-quote rusttestlib-syn FORCE +$(call if_changed,rustc_test_library) rusttestlib-pin_init_internal: private rustc_target_flags = --cfg kernel \ @@ -339,7 +341,8 @@ quiet_cmd_rustc_test = $(RUSTC_OR_CLIPPY_QUIET) T $< rusttest: rusttest-macros rusttest-macros: private rustc_target_flags = --extern proc_macro \ - --extern macros --extern kernel --extern pin_init + --extern macros --extern kernel --extern pin_init \ + --extern proc_macro2 --extern quote --extern syn rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro rusttest-macros: $(src)/macros/lib.rs \ rusttestlib-macros rusttestlib-kernel rusttestlib-pin_init FORCE @@ -529,6 +532,8 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ @$(objtree)/include/generated/rustc_cfg $< # Procedural macros can only be used with the `rustc` that compiled it. +$(obj)/$(libmacros_name): private rustc_target_flags = \ + --extern proc_macro2 --extern quote --extern syn $(obj)/$(libmacros_name): $(src)/macros/lib.rs $(obj)/libproc_macro2.rlib \ $(obj)/libquote.rlib $(obj)/libsyn.rlib FORCE +$(call if_changed_dep,rustc_procmacro) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 5b6f7b8d6918..147d0cc94068 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -110,7 +110,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", - ["std", "proc_macro"], + ["std", "proc_macro", "proc_macro2", "quote", "syn"], is_proc_macro=True, ) -- cgit v1.2.3 From 2a9c8c0b59d366acabb8f891e84569376f3e2709 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 25 Nov 2025 14:18:20 +0100 Subject: kbuild: add target to build a cpio containing modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new package target to build a cpio archive containing the kernel modules. This is particularly useful to supplement an existing initramfs with the kernel modules so that the root filesystem can be started with all needed kernel modules without modifying it. Signed-off-by: Sascha Hauer Reviewed-by: Simon Glass Tested-by: Simon Glass Co-developed-by: Ahmad Fatoum Signed-off-by: Ahmad Fatoum Reviewed-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Tested-by: Nicolas Schier Link: https://patch.msgid.link/20251125-cpio-modules-pkg-v2-2-aa8277d89682@pengutronix.de Signed-off-by: Nicolas Schier --- scripts/Makefile.package | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 74bcb9e7f7a4..83bfcf7cb09f 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -189,6 +189,25 @@ tar-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar tar%-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar.% FORCE @: +# modules-cpio-pkg - generate an initramfs with the modules +# --------------------------------------------------------------------------- + +.tmp_modules_cpio: FORCE + $(Q)$(MAKE) -f $(srctree)/Makefile + $(Q)rm -rf $@ + $(Q)$(MAKE) -f $(srctree)/Makefile INSTALL_MOD_PATH=$@ modules_install + +quiet_cmd_cpio = CPIO $@ + cmd_cpio = $(CONFIG_SHELL) $(srctree)/usr/gen_initramfs.sh -o $@ $< + +modules-$(KERNELRELEASE)-$(ARCH).cpio: .tmp_modules_cpio + $(Q)$(MAKE) $(build)=usr usr/gen_init_cpio + $(call cmd,cpio) + +PHONY += modules-cpio-pkg +modules-cpio-pkg: modules-$(KERNELRELEASE)-$(ARCH).cpio + @: + # perf-tar*-src-pkg - generate a source tarball with perf source # --------------------------------------------------------------------------- @@ -245,6 +264,7 @@ help: @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' @echo ' tarxz-pkg - Build the kernel as a xz compressed tarball' @echo ' tarzst-pkg - Build the kernel as a zstd compressed tarball' + @echo ' modules-cpio-pkg - Build the kernel modules as cpio archive' @echo ' perf-tar-src-pkg - Build the perf source tarball with no compression' @echo ' perf-targz-src-pkg - Build the perf source tarball with gzip compression' @echo ' perf-tarbz2-src-pkg - Build the perf source tarball with bz2 compression' -- cgit v1.2.3 From f387d0e1027f2d13cbfc1305b54198af701ede19 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Dec 2025 09:59:37 -0800 Subject: x86/asm: Remove ANNOTATE_DATA_SPECIAL usage Instead of manually annotating each __ex_table entry, just make the section mergeable and store the entry size in the ELF section header. Either way works for objtool create_fake_symbols(), this way produces cleaner code generation. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://patch.msgid.link/b858cb7891c1ba0080e22a9c32595e6c302435e2.1764694625.git.jpoimboe@kernel.org --- arch/um/include/asm/Kbuild | 1 - arch/um/include/shared/common-offsets.h | 1 + arch/x86/include/asm/asm.h | 25 ++++++++++++++----------- arch/x86/kernel/asm-offsets.c | 1 + arch/x86/um/shared/sysdep/kernel-offsets.h | 2 ++ kernel/bounds.c | 1 + scripts/mod/devicetable-offsets.c | 1 + 7 files changed, 20 insertions(+), 12 deletions(-) (limited to 'scripts') diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b6810db24ca4..1b9b82bbe322 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += device.h generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h -generic-y += extable.h generic-y += ftrace.h generic-y += hw_irq.h generic-y += irq_regs.h diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 4e19103afd71..a6f77cb6aa7e 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -20,3 +20,4 @@ DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); DEFINE(ALT_INSTR_SIZE, sizeof(struct alt_instr)); +DEFINE(EXTABLE_SIZE, sizeof(struct exception_table_entry)); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index bd62bd87a841..0e8c611bc9e2 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -126,18 +126,21 @@ static __always_inline __pure void *rip_rel_ptr(void *p) #ifdef __KERNEL__ +#ifndef COMPILE_OFFSETS +#include +#endif + # include /* Exception table entry */ #ifdef __ASSEMBLER__ -# define _ASM_EXTABLE_TYPE(from, to, type) \ - .pushsection "__ex_table","a" ; \ - .balign 4 ; \ - ANNOTATE_DATA_SPECIAL ; \ - .long (from) - . ; \ - .long (to) - . ; \ - .long type ; \ +# define _ASM_EXTABLE_TYPE(from, to, type) \ + .pushsection "__ex_table", "aM", @progbits, EXTABLE_SIZE ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long type ; \ .popsection # ifdef CONFIG_KPROBES @@ -180,18 +183,18 @@ static __always_inline __pure void *rip_rel_ptr(void *p) ".purgem extable_type_reg\n" # define _ASM_EXTABLE_TYPE(from, to, type) \ - " .pushsection \"__ex_table\",\"a\"\n" \ + " .pushsection __ex_table, \"aM\", @progbits, " \ + __stringify(EXTABLE_SIZE) "\n" \ " .balign 4\n" \ - ANNOTATE_DATA_SPECIAL \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ " .long " __stringify(type) " \n" \ " .popsection\n" # define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ - " .pushsection \"__ex_table\",\"a\"\n" \ + " .pushsection __ex_table, \"aM\", @progbits, " \ + __stringify(EXTABLE_SIZE) "\n" \ " .balign 4\n" \ - ANNOTATE_DATA_SPECIAL \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ DEFINE_EXTABLE_TYPE_REG \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index db3bb5143329..25fcde525c68 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -126,4 +126,5 @@ static void __used common(void) BLANK(); DEFINE(ALT_INSTR_SIZE, sizeof(struct alt_instr)); + DEFINE(EXTABLE_SIZE, sizeof(struct exception_table_entry)); } diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 6fd1ed400399..8215a0200ddd 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define COMPILE_OFFSETS #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include /* workaround for a warning with -Wmissing-prototypes */ void foo(void); diff --git a/kernel/bounds.c b/kernel/bounds.c index 29b2cd00df2c..02b619eb6106 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -6,6 +6,7 @@ */ #define __GENERATING_BOUNDS_H +#define COMPILE_OFFSETS /* Include headers that define the enum constants of interest */ #include #include diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index d3d00e85edf7..ef2ffb68f69d 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define COMPILE_OFFSETS #include #include -- cgit v1.2.3 From 316f0b43fe0131af869a5a58e20ec6e0b6038fa8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 2 Dec 2025 11:30:26 -0800 Subject: coccinelle: Drop pm_runtime_barrier() error code checks This function doesn't return anything any more, so the compiler would notice any bad error handling before any cocci script would. Signed-off-by: Brian Norris Tested-by: Guenter Roeck Link: https://patch.msgid.link/20251202193129.1411419-3-briannorris@chromium.org Signed-off-by: Rafael J. Wysocki --- scripts/coccinelle/api/pm_runtime.cocci | 1 - 1 file changed, 1 deletion(-) (limited to 'scripts') diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci index 2c931e748dda..bf128ccae921 100644 --- a/scripts/coccinelle/api/pm_runtime.cocci +++ b/scripts/coccinelle/api/pm_runtime.cocci @@ -37,7 +37,6 @@ ret@p = \(pm_runtime_idle\| pm_runtime_put_sync_autosuspend\| pm_runtime_set_active\| pm_schedule_suspend\| - pm_runtime_barrier\| pm_generic_runtime_suspend\| pm_generic_runtime_resume\)(...); ... -- cgit v1.2.3 From 7a7e836684feb33d4f5418e8bd44101faf6b3f44 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Dec 2025 20:40:23 -0500 Subject: tracing: Fix unused tracepoints when module uses only exported ones Building the KVM intel module failed to build with UT=1: no __tracepoint_strings in file: arch/x86/kvm/kvm-intel.o make[3]: *** [/work/git/test-linux.git/scripts/Makefile.modfinal:62: arch/x86/kvm/kvm-intel.ko] Error 1 The reason is that the module only uses the tracepoints defined and exported by the main kvm module. The tracepoint-update.c code fails the build if a tracepoint is used, but there's no tracepoints defined. But this is acceptable in modules if the tracepoints are defined in the vmlinux proper or another module and exported. Do not fail to build if a tracepoint is used but no tracepoints are defined if the code is a module. This should still never happen for the vmlinux itself. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Masahiro Yamada Link: https://patch.msgid.link/20251209204023.76941824@fedora Fixes: e30f8e61e2518 ("tracing: Add a tracepoint verification check at build time") Signed-off-by: Steven Rostedt (Google) --- scripts/tracepoint-update.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'scripts') diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c index 7f7d90df14ce..90046aedc97b 100644 --- a/scripts/tracepoint-update.c +++ b/scripts/tracepoint-update.c @@ -210,6 +210,9 @@ static int process_tracepoints(bool mod, void *addr, const char *fname) } if (!tracepoint_data_sec) { + /* A module may reference only exported tracepoints */ + if (mod) + return 0; fprintf(stderr, "no __tracepoint_strings in file: %s\n", fname); return -1; } -- cgit v1.2.3 From 01da5216c572f6f8fca4e272451aad6c273b0d57 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Wed, 3 Dec 2025 20:58:49 +0530 Subject: checkpatch: add uninitialized pointer with __free attribute check Uinitialized pointers with __free attribute can cause undefined behavior as the memory randomly assigned to the pointer is freed automatically when the pointer goes out of scope. add check in checkpatch to detect such issues. Link: https://lkml.kernel.org/r/20251203-aheev-checkpatch-uninitialized-free-v7-1-841e3b31d8f3@gmail.com Signed-off-by: Ally Heev Suggested-by: Dan Carpenter Link: https://lore.kernel.org/all/8a4c0b43-cf63-400d-b33d-d9c447b7e0b9@suswa.mountain/ Link: https://lore.kernel.org/all/58fd478f408a34b578ee8d949c5c4b4da4d4f41d.camel@HansenPartnership.com/ Acked-by: Dan Williams Reviewed-by: Krzysztof Kozlowski Acked-by: Joe Perches Cc: Andy Whitcroft Cc: David Hunter Cc: Dwaipayan Ray Cc: Geert Uytterhoeven Cc: James Bottomley Cc: Jonathan Corbet Cc: Lukas Bulwahn Cc: Menon, Nishanth Cc: Stephen Boyd Cc: Viresh Kumar Signed-off-by: Andrew Morton --- Documentation/dev-tools/checkpatch.rst | 23 +++++++++++++++++++++++ scripts/checkpatch.pl | 6 ++++++ 2 files changed, 29 insertions(+) (limited to 'scripts') diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst index 4dccd1036870..84ccb52e120b 100644 --- a/Documentation/dev-tools/checkpatch.rst +++ b/Documentation/dev-tools/checkpatch.rst @@ -1009,6 +1009,29 @@ Functions and Variables return bar; + **UNINITIALIZED_PTR_WITH_FREE** + Pointers with __free attribute should be declared at the place of use + and initialized (see include/linux/cleanup.h). In this case + declarations at the top of the function rule can be relaxed. Not doing + so may lead to undefined behavior as the memory assigned (garbage, + in case not initialized) to the pointer is freed automatically when + the pointer goes out of scope. + + Also see: https://lore.kernel.org/lkml/58fd478f408a34b578ee8d949c5c4b4da4d4f41d.camel@HansenPartnership.com/ + + Example:: + + type var __free(free_func); + ... // var not used, but, in future someone might add a return here + var = malloc(var_size); + ... + + should be initialized as:: + + ... + type var __free(free_func) = malloc(var_size); + ... + Permissions ----------- diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d58ca9655ab7..c0250244cf7a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -7732,6 +7732,12 @@ sub process { ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n"); } } + +# check for uninitialized pointers with __free attribute + while ($line =~ /\*\s*($Ident)\s+__free\s*\(\s*$Ident\s*\)\s*[,;]/g) { + ERROR("UNINITIALIZED_PTR_WITH_FREE", + "pointer '$1' with __free attribute should be initialized\n" . $herecurr); + } } # If we have no input at all, then there is nothing to report on -- cgit v1.2.3