From cde494660f561909ad44a27037c7155454159136 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 22 Aug 2025 16:19:18 +0200 Subject: tools: docs: parse-headers.py: move it from sphinx dir As suggested by Jon, we should start having a tools/docs directory, instead of placing everything under scripts. In the specific case of parse-headers.py, the previous location is where we're placing Sphinx extensions, which is not the right place for execs. Move it to tools/docs/parse-headers.py. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/0f5ac2d704cffe9834e589b39549d2393e1237ef.1755872208.git.mchehab+huawei@kernel.org --- tools/docs/lib/__init__.py | 0 tools/docs/lib/enrich_formatter.py | 70 ++++++ tools/docs/lib/parse_data_structs.py | 398 +++++++++++++++++++++++++++++++++++ tools/docs/parse-headers.py | 57 +++++ 4 files changed, 525 insertions(+) create mode 100644 tools/docs/lib/__init__.py create mode 100644 tools/docs/lib/enrich_formatter.py create mode 100755 tools/docs/lib/parse_data_structs.py create mode 100755 tools/docs/parse-headers.py (limited to 'tools/docs') diff --git a/tools/docs/lib/__init__.py b/tools/docs/lib/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/docs/lib/enrich_formatter.py b/tools/docs/lib/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/docs/lib/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab . + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py new file mode 100755 index 000000000000..2b7fa6bd8321 --- /dev/null +++ b/tools/docs/lib/parse_data_structs.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore ` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace + + Replaces how old_symbol with a new reference. The new_reference can be: + - A simple symbol name; + - A full Sphinx reference. + + On both cases, can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + }, + # This is the name of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def store_type(self, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type: + if symbol_type == "enum": + ref_link = f"{ref_type}:`{symbol}`" + else: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}" + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type("symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type("ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type("define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type("typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type("enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type("struct", match.group(1)) + break + + def process_exceptions(self, fname: str): + """ + Process exceptions file with rules to ignore or replace references. + """ + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # Handle ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + if match: + c_type = match.group(1) + symbol = match.group(2) + + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + continue + + # Handle replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if not match: + sys.exit(f"{name}:{ln}: invalid line: {line}") + + c_type, old, new = match.groups() + + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + self.symbols[c_type][old] = new_ref + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, ref in sorted(refs.items()): + print(f" {symbol} -> {ref}") + + print() + + def write_output(self, file_in: str, file_out: str): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, replacement in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + + title = os.path.basename(file_in) + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title)) + f.write("\n\n.. parsed-literal::\n\n") + f.write(text) diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py new file mode 100755 index 000000000000..07d3b47c4834 --- /dev/null +++ b/tools/docs/parse-headers.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016, 2025 by Mauro Carvalho Chehab . +# pylint: disable=C0103 + +""" +Convert a C header or source file ``FILE_IN``, into a ReStructured Text +included via ..parsed-literal block with cross-references for the +documentation files that describe the API. It accepts an optional +``FILE_RULES`` file to describes what elements will be either ignored or +be pointed to a non-default reference type/name. + +The output is written at ``FILE_OUT``. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional ``FILE_RULES`` contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import argparse + +from lib.parse_data_structs import ParseDataStructs +from lib.enrich_formatter import EnrichFormatter + +def main(): + """Main function""" + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=EnrichFormatter) + + parser.add_argument("-d", "--debug", action="count", default=0, + help="Increase debug level. Can be used multiple times") + parser.add_argument("file_in", help="Input C file") + parser.add_argument("file_out", help="Output RST file") + parser.add_argument("file_rules", nargs="?", + help="Exceptions file (optional)") + + args = parser.parse_args() + + parser = ParseDataStructs(debug=args.debug) + parser.parse_file(args.file_in) + + if args.file_rules: + parser.process_exceptions(args.file_rules) + + parser.debug_print() + parser.write_output(args.file_in, args.file_out) + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 242cfe3f774e8a41d0b27d4664247f58d0a8d039 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 22 Aug 2025 16:19:19 +0200 Subject: tools: docs: parse_data_structs.py: add methods to return output When running it from command line, we want to write an output file, but when used as a class, one may just want the output content returned as a string. Split write_output() on two methods to allow both usecases. Also add an extra method to produce a TOC. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/c98bdec3380aad54178baf2751a2f1fcd128576b.1755872208.git.mchehab+huawei@kernel.org --- tools/docs/lib/parse_data_structs.py | 62 +++++++++++++++++++++++++++++++++--- tools/docs/parse-headers.py | 5 ++- 2 files changed, 62 insertions(+), 5 deletions(-) (limited to 'tools/docs') diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py index 2b7fa6bd8321..a5aa2e182052 100755 --- a/tools/docs/lib/parse_data_structs.py +++ b/tools/docs/lib/parse_data_structs.py @@ -97,33 +97,39 @@ class ParseDataStructs: "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":ref", + "description": "IOCTL Commands", }, "define": { "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":ref", + "description": "Macros and Definitions", }, # We're calling each definition inside an enum as "symbol" "symbol": { "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":ref", + "description": "Enumeration values", }, "typedef": { "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":c:type", + "description": "Type Definitions", }, - # This is the name of the enum itself + # This is the description of the enum itself "enum": { "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":c:type", + "description": "Enumerations", }, "struct": { "prefix": "\\ ", "suffix": "\\ ", "ref_type": ":c:type", + "description": "Structures", }, } @@ -359,7 +365,7 @@ class ParseDataStructs: print() - def write_output(self, file_in: str, file_out: str): + def gen_output(self): """Write the formatted output to a file.""" # Avoid extra blank lines @@ -387,12 +393,60 @@ class ParseDataStructs: text = re.sub(r"\\ ([\n ])", r"\1", text) text = re.sub(r" \\ ", " ", text) + return text + def gen_toc(self): + """ + Create a TOC table pointing to each symbol from the header + """ + text = [] + + # Add header + text.append(".. contents:: Table of Contents") + text.append(" :depth: 2") + text.append(" :local:") + text.append("") + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, ref in sorted(refs.items()): + text.append(f"* :{ref}:") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): title = os.path.basename(file_in) + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") f.write(f"{title}\n") - f.write("=" * len(title)) - f.write("\n\n.. parsed-literal::\n\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + f.write(text) diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index 07d3b47c4834..bfa4e46a53e3 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -36,6 +36,9 @@ def main(): parser.add_argument("-d", "--debug", action="count", default=0, help="Increase debug level. Can be used multiple times") + parser.add_argument("-t", "--toc", action="store_true", + help="instead of a literal block, outputs a TOC table at the RST file") + parser.add_argument("file_in", help="Input C file") parser.add_argument("file_out", help="Output RST file") parser.add_argument("file_rules", nargs="?", @@ -50,7 +53,7 @@ def main(): parser.process_exceptions(args.file_rules) parser.debug_print() - parser.write_output(args.file_in, args.file_out) + parser.write_output(args.file_in, args.file_out, args.toc) if __name__ == "__main__": -- cgit v1.2.3 From 2f1c96018b10e6d42280db3a6faa47d80c961b6b Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 5 Sep 2025 16:46:06 +0200 Subject: docs: add tools/docs/gen-renames.py Add a new script that wraps git to trawl the repository history for renames of .rst files in the Documentation/ directory. Example usage: tools/docs/gen-renames.py --rev v6.17-rc3 > Documentation/.renames.txt The output format is simply: SPACE NEWLINE where neither nor contain the Documentation/ prefix or the .rst suffix. The file is sorted alphabetically. We can suggest rerunning the script for future renames (and squash the resulting change) or rerun it periodically to keep the file up to date. Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Message-ID: <20250905144608.577449-2-vegard.nossum@oracle.com> --- tools/docs/gen-renames.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100755 tools/docs/gen-renames.py (limited to 'tools/docs') diff --git a/tools/docs/gen-renames.py b/tools/docs/gen-renames.py new file mode 100755 index 000000000000..8cb3b2157d83 --- /dev/null +++ b/tools/docs/gen-renames.py @@ -0,0 +1,130 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright © 2025, Oracle and/or its affiliates. +# Author: Vegard Nossum + +"""Trawl repository history for renames of Documentation/**.rst files. + +Example: + + tools/docs/gen-renames.py --rev HEAD > Documentation/.renames.txt +""" + +import argparse +import itertools +import os +import subprocess +import sys + +parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument('--rev', default='HEAD', help='generate renames up to this revision') + +args = parser.parse_args() + +def normalize(path): + prefix = 'Documentation/' + suffix = '.rst' + + assert path.startswith(prefix) + assert path.endswith(suffix) + + return path[len(prefix):-len(suffix)] + +class Name(object): + def __init__(self, name): + self.names = [name] + + def rename(self, new_name): + self.names.append(new_name) + +names = { +} + +for line in subprocess.check_output([ + 'git', 'log', + '--reverse', + '--oneline', + '--find-renames', + '--diff-filter=RD', + '--name-status', + '--format=commit %H', + # ~v4.8-ish is when Sphinx/.rst was added in the first place + f'v4.8..{args.rev}', + '--', + 'Documentation/' +], text=True).splitlines(): + # rename + if line.startswith('R'): + _, old, new = line[1:].split('\t', 2) + + if old.endswith('.rst') and new.endswith('.rst'): + old = normalize(old) + new = normalize(new) + + name = names.get(old) + if name is None: + name = Name(old) + else: + del names[old] + + name.rename(new) + names[new] = name + + continue + + # delete + if line.startswith('D'): + _, old = line.split('\t', 1) + + if old.endswith('.rst'): + old = normalize(old) + + # TODO: we could save added/modified files as well and propose + # them as alternatives + name = names.get(old) + if name is None: + pass + else: + del names[old] + + continue + +# +# Get the set of current files so we can sanity check that we aren't +# redirecting any of those +# + +current_files = set() +for line in subprocess.check_output([ + 'git', 'ls-tree', + '-r', + '--name-only', + args.rev, + 'Documentation/', +], text=True).splitlines(): + if line.endswith('.rst'): + current_files.add(normalize(line)) + +# +# Format/group/output result +# + +result = [] +for _, v in names.items(): + old_names = v.names[:-1] + new_name = v.names[-1] + + for old_name in old_names: + if old_name == new_name: + # A file was renamed to its new name twice; don't redirect that + continue + + if old_name in current_files: + # A file was recreated with a former name; don't redirect those + continue + + result.append((old_name, new_name)) + +for old_name, new_name in sorted(result): + print(f"{old_name} {new_name}") -- cgit v1.2.3 From f2c2f6490085e29521f87d5464b2cdceff0f0c7a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 5 Sep 2025 16:46:08 +0200 Subject: docs: add tools/docs/gen-redirects.py Add a new script and a new documentation 'make' target, htmldocs-redirects. This will generate HTML stub files in the HTML documentation output directory that redirect the browser to the new path. Suggested-by: Konstantin Ryabitsev Suggested-by: Jonathan Corbet Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Message-ID: <20250905144608.577449-4-vegard.nossum@oracle.com> --- Documentation/Makefile | 4 ++++ Makefile | 5 +++-- tools/docs/gen-redirects.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100755 tools/docs/gen-redirects.py (limited to 'tools/docs') diff --git a/Documentation/Makefile b/Documentation/Makefile index 5c20c68be89a..3609cb86137b 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -108,6 +108,9 @@ htmldocs: @$(srctree)/scripts/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) +htmldocs-redirects: $(srctree)/Documentation/.renames.txt + @tools/docs/gen-redirects.py --output $(BUILDDIR) < $< + # If Rust support is available and .config exists, add rustdoc generated contents. # If there are any, the errors from this make rustdoc will be displayed but # won't stop the execution of htmldocs @@ -175,6 +178,7 @@ cleandocs: dochelp: @echo ' Linux kernel internal documentation in different formats from ReST:' @echo ' htmldocs - HTML' + @echo ' htmldocs-redirects - generate HTML redirects for moved pages' @echo ' texinfodocs - Texinfo' @echo ' infodocs - Info' @echo ' latexdocs - LaTeX' diff --git a/Makefile b/Makefile index 6bfe776bf3c5..d764afe3a30a 100644 --- a/Makefile +++ b/Makefile @@ -1799,8 +1799,9 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- -DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs epubdocs cleandocs \ - linkcheckdocs dochelp refcheckdocs texinfodocs infodocs +DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs htmldocs-redirects \ + epubdocs cleandocs linkcheckdocs dochelp refcheckdocs \ + texinfodocs infodocs PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ diff --git a/tools/docs/gen-redirects.py b/tools/docs/gen-redirects.py new file mode 100755 index 000000000000..6a6ebf6f42dc --- /dev/null +++ b/tools/docs/gen-redirects.py @@ -0,0 +1,54 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright © 2025, Oracle and/or its affiliates. +# Author: Vegard Nossum + +"""Generate HTML redirects for renamed Documentation/**.rst files using +the output of tools/docs/gen-renames.py. + +Example: + + tools/docs/gen-redirects.py --output Documentation/output/ < Documentation/.renames.txt +""" + +import argparse +import os +import sys + +parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument('-o', '--output', help='output directory') + +args = parser.parse_args() + +for line in sys.stdin: + line = line.rstrip('\n') + + old_name, new_name = line.split(' ', 2) + + old_html_path = os.path.join(args.output, old_name + '.html') + new_html_path = os.path.join(args.output, new_name + '.html') + + if not os.path.exists(new_html_path): + print(f"warning: target does not exist: {new_html_path} (redirect from {old_html_path})") + continue + + old_html_dir = os.path.dirname(old_html_path) + if not os.path.exists(old_html_dir): + os.makedirs(old_html_dir) + + relpath = os.path.relpath(new_name, os.path.dirname(old_name)) + '.html' + + with open(old_html_path, 'w') as f: + print(f"""\ + + + + + This page has moved + + + +

This page has moved to {new_name}.

+ +""", file=f) -- cgit v1.2.3