summaryrefslogtreecommitdiff
path: root/scripts/lib/abi/abi_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/abi/abi_parser.py')
-rw-r--r--scripts/lib/abi/abi_parser.py512
1 files changed, 512 insertions, 0 deletions
diff --git a/scripts/lib/abi/abi_parser.py b/scripts/lib/abi/abi_parser.py
new file mode 100644
index 000000000000..b3fa70eee412
--- /dev/null
+++ b/scripts/lib/abi/abi_parser.py
@@ -0,0 +1,512 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+from argparse import Namespace
+import logging
+import os
+import re
+
+from glob import glob
+from pprint import pformat
+from random import randrange, seed
+
+# Import Python modules
+
+from helpers import AbiDebug, ABI_DIR
+
+
+class AbiParser:
+ """Main class to parse ABI files"""
+
+ TAGS = r"(what|where|date|kernelversion|contact|description|users)"
+ XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
+
+ def __init__(self, directory, logger=None,
+ enable_lineno=False, show_warnings=True, debug=0):
+ """Stores arguments for the class and initialize class vars"""
+
+ self.directory = directory
+ self.enable_lineno = enable_lineno
+ self.show_warnings = show_warnings
+ self.debug = debug
+
+ if not logger:
+ self.log = logging.getLogger("get_abi")
+ else:
+ self.log = logger
+
+ self.data = {}
+ self.what_symbols = {}
+ self.file_refs = {}
+ self.what_refs = {}
+
+ # Regular expressions used on parser
+ self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
+ self.re_valid = re.compile(self.TAGS)
+ self.re_start_spc = re.compile(r"(\s*)(\S.*)")
+ self.re_whitespace = re.compile(r"^\s+")
+
+ # Regular used on print
+ self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
+ self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
+ self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
+ self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
+ self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
+ self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
+ self.re_xref_node = re.compile(self.XREF)
+
+ def warn(self, fdata, msg, extra=None):
+ """Displays a parse error if warning is enabled"""
+
+ if not self.show_warnings:
+ return
+
+ msg = f"{fdata.fname}:{fdata.ln}: {msg}"
+ if extra:
+ msg += "\n\t\t" + extra
+
+ self.log.warning(msg)
+
+ def add_symbol(self, what, fname, ln=None, xref=None):
+ """Create a reference table describing where each 'what' is located"""
+
+ if what not in self.what_symbols:
+ self.what_symbols[what] = {"file": {}}
+
+ if fname not in self.what_symbols[what]["file"]:
+ self.what_symbols[what]["file"][fname] = []
+
+ if ln and ln not in self.what_symbols[what]["file"][fname]:
+ self.what_symbols[what]["file"][fname].append(ln)
+
+ if xref:
+ self.what_symbols[what]["xref"] = xref
+
+ def _parse_line(self, fdata, line):
+ """Parse a single line of an ABI file"""
+
+ new_what = False
+ new_tag = False
+ content = None
+
+ match = self.re_tag.match(line)
+ if match:
+ new = match.group(1).lower()
+ sep = match.group(2)
+ content = match.group(3)
+
+ match = self.re_valid.search(new)
+ if match:
+ new_tag = match.group(1)
+ else:
+ if fdata.tag == "description":
+ # New "tag" is actually part of description.
+ # Don't consider it a tag
+ new_tag = False
+ elif fdata.tag != "":
+ self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
+
+ if new_tag:
+ # "where" is Invalid, but was a common mistake. Warn if found
+ if new_tag == "where":
+ self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
+ new_tag = "what"
+
+ if new_tag == "what":
+ fdata.space = None
+
+ if content not in self.what_symbols:
+ self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
+
+ if fdata.tag == "what":
+ fdata.what.append(content.strip("\n"))
+ else:
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fdata.fname,
+ ln=fdata.what_ln, xref=fdata.key)
+
+ fdata.label = content
+ new_what = True
+
+ key = "abi_" + content.lower()
+ fdata.key = self.re_unprintable.sub("_", key).strip("_")
+
+ # Avoid duplicated keys but using a defined seed, to make
+ # the namespace identical if there aren't changes at the
+ # ABI symbols
+ seed(42)
+
+ while fdata.key in self.data:
+ char = randrange(0, 51) + ord("A")
+ if char > ord("Z"):
+ char += ord("a") - ord("Z") - 1
+
+ fdata.key += chr(char)
+
+ if fdata.key and fdata.key not in self.data:
+ self.data[fdata.key] = {
+ "what": [content],
+ "file": [fdata.file_ref],
+ "line_no": fdata.ln,
+ }
+
+ fdata.what = self.data[fdata.key]["what"]
+
+ self.what_refs[content] = fdata.key
+ fdata.tag = new_tag
+ fdata.what_ln = fdata.ln
+
+ if fdata.nametag["what"]:
+ t = (content, fdata.key)
+ if t not in fdata.nametag["symbols"]:
+ fdata.nametag["symbols"].append(t)
+
+ return
+
+ if fdata.tag and new_tag:
+ fdata.tag = new_tag
+
+ if new_what:
+ fdata.label = ""
+
+ self.data[fdata.key]["type"] = fdata.ftype
+
+ if "description" in self.data[fdata.key]:
+ self.data[fdata.key]["description"] += "\n\n"
+
+ if fdata.file_ref not in self.data[fdata.key]["file"]:
+ self.data[fdata.key]["file"].append(fdata.file_ref)
+
+ if self.debug == AbiDebug.WHAT_PARSING:
+ self.log.debug("what: %s", fdata.what)
+
+ if not fdata.what:
+ self.warn(fdata, "'What:' should come first:", line)
+ return
+
+ if new_tag == "description":
+ fdata.space = None
+
+ if content:
+ sep = sep.replace(":", " ")
+
+ c = " " * len(new_tag) + sep + content
+ c = c.expandtabs()
+
+ match = self.re_start_spc.match(c)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+ content = match.group(2) + "\n"
+
+ self.data[fdata.key][fdata.tag] = content
+
+ return
+
+ # Store any contents before tags at the database
+ if not fdata.tag and "what" in fdata.nametag:
+ fdata.nametag["description"] += line
+ return
+
+ if fdata.tag == "description":
+ content = line.expandtabs()
+
+ if self.re_whitespace.sub("", content) == "":
+ self.data[fdata.key][fdata.tag] += "\n"
+ return
+
+ if fdata.space is None:
+ match = self.re_start_spc.match(content)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+
+ content = match.group(2) + "\n"
+ else:
+ if content.startswith(fdata.space):
+ content = content[len(fdata.space):]
+
+ else:
+ fdata.space = ""
+
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += content
+ return
+
+ content = line.strip()
+ if fdata.tag:
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
+ return
+
+ # Everything else is error
+ if content:
+ self.warn(fdata, "Unexpected content", line)
+
+ def parse_file(self, fname, path, basename):
+ """Parse a single file"""
+
+ ref = f"abi_file_{path}_{basename}"
+ ref = self.re_unprintable.sub("_", ref).strip("_")
+
+ # Store per-file state into a namespace variable. This will be used
+ # by the per-line parser state machine and by the warning function.
+ fdata = Namespace
+
+ fdata.fname = fname
+ fdata.name = basename
+
+ pos = fname.find(ABI_DIR)
+ if pos > 0:
+ f = fname[pos:]
+ else:
+ f = fname
+
+ fdata.file_ref = (f, ref)
+ self.file_refs[f] = ref
+
+ fdata.ln = 0
+ fdata.what_ln = 0
+ fdata.tag = ""
+ fdata.label = ""
+ fdata.what = []
+ fdata.key = None
+ fdata.xrefs = None
+ fdata.space = None
+ fdata.ftype = path.split("/")[0]
+
+ fdata.nametag = {}
+ fdata.nametag["what"] = [f"File {path}/{basename}"]
+ fdata.nametag["type"] = "File"
+ fdata.nametag["file"] = [fdata.file_ref]
+ fdata.nametag["line_no"] = 1
+ fdata.nametag["description"] = ""
+ fdata.nametag["symbols"] = []
+
+ self.data[ref] = fdata.nametag
+
+ if self.debug & AbiDebug.WHAT_OPEN:
+ self.log.debug("Opening file %s", fname)
+
+ with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+ for line in fp:
+ fdata.ln += 1
+
+ self._parse_line(fdata, line)
+
+ if "description" in fdata.nametag:
+ fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
+
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fname, xref=fdata.key)
+
+ def parse_abi(self):
+ """Parse documentation ABI"""
+
+ ignore_suffixes = ("rej", "org", "orig", "bak", "~")
+ re_abi = re.compile(r".*" + ABI_DIR)
+
+ for fname in glob(os.path.join(self.directory, "**"), recursive=True):
+ if os.path.isdir(fname):
+ continue
+
+ basename = os.path.basename(fname)
+
+ if basename == "README":
+ continue
+ if basename.startswith(".") or basename.endswith(ignore_suffixes):
+ continue
+
+ path = re_abi.sub("", os.path.dirname(fname))
+
+ self.parse_file(fname, path, basename)
+
+ if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
+ self.log.debug(pformat(self.data))
+
+ def print_desc_txt(self, desc):
+ """Print description as found inside ABI files"""
+
+ desc = desc.strip(" \t\n")
+
+ print(desc + "\n")
+
+ def print_desc_rst(self, desc):
+ """Enrich ReST output by creating cross-references"""
+
+ # Remove title markups from the description
+ # Having titles inside ABI files will only work if extra
+ # care would be taken in order to strictly follow the same
+ # level order for each markup.
+ desc = self.re_title_mark.sub("\n\n", "\n" + desc)
+ desc = desc.rstrip(" \t\n").lstrip("\n")
+
+ # Python's regex performance for non-compiled expressions is a lot
+ # than Perl, as Perl automatically caches them at their
+ # first usage. Here, we'll need to do the same, as otherwise the
+ # performance penalty is be high
+
+ new_desc = ""
+ for d in desc.split("\n"):
+ if d == "":
+ new_desc += "\n"
+ continue
+
+ # Use cross-references for doc files where needed
+ d = self.re_doc.sub(r":doc:`/\1`", d)
+
+ # Use cross-references for ABI generated docs where needed
+ matches = self.re_abi.findall(d)
+ for m in matches:
+ abi = m[0] + m[1]
+
+ xref = self.file_refs.get(abi)
+ if not xref:
+ # This may happen if ABI is on a separate directory,
+ # like parsing ABI testing and symbol is at stable.
+ # The proper solution is to move this part of the code
+ # for it to be inside sphinx/kernel_abi.py
+ self.log.info("Didn't find ABI reference for '%s'", abi)
+ else:
+ new = self.re_escape.sub(r"\\\1", m[1])
+ d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
+
+ # Seek for cross reference symbols like /sys/...
+ # Need to be careful to avoid doing it on a code block
+ if d[0] not in [" ", "\t"]:
+ matches = self.re_xref_node.findall(d)
+ for m in matches:
+ # Finding ABI here is more complex due to wildcards
+ xref = self.what_refs.get(m)
+ if xref:
+ new = self.re_escape.sub(r"\\\1", m)
+ d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
+
+ new_desc += d + "\n"
+
+ print(new_desc + "\n")
+
+ def print_data(self, enable_lineno, output_in_txt, show_file=False):
+ """Print ABI at stdout"""
+
+ part = None
+ for key, v in sorted(self.data.items(),
+ key=lambda x: (x[1].get("type", ""),
+ x[1].get("what"))):
+
+ wtype = v.get("type", "Var")
+ file_ref = v.get("file")
+ names = v.get("what", [""])
+
+ if not show_file and wtype == "File":
+ continue
+
+ if enable_lineno:
+ ln = v.get("line_no", 1)
+ print(f".. LINENO {file_ref[0][0]}#{ln}\n")
+
+ if wtype != "File":
+ cur_part = names[0]
+ if cur_part.find("/") >= 0:
+ match = self.re_what.match(cur_part)
+ if match:
+ symbol = match.group(1).rstrip("/")
+ cur_part = "Symbols under " + symbol
+
+ if cur_part and cur_part != part:
+ part = cur_part
+ print(f"{part}\n{"-" * len(part)}\n")
+
+ print(f".. _{key}:\n")
+
+ max_len = 0
+ for i in range(0, len(names)): # pylint: disable=C0200
+ names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
+
+ max_len = max(max_len, len(names[i]))
+
+ print("+-" + "-" * max_len + "-+")
+ for name in names:
+ print(f"| {name}" + " " * (max_len - len(name)) + " |")
+ print("+-" + "-" * max_len + "-+")
+ print()
+
+ for ref in file_ref:
+ if wtype == "File":
+ print(f".. _{ref[1]}:\n")
+ else:
+ base = os.path.basename(ref[0])
+ print(f"Defined on file :ref:`{base} <{ref[1]}>`\n")
+
+ if wtype == "File":
+ print(f"{names[0]}\n{"-" * len(names[0])}\n")
+
+ desc = v.get("description")
+ if not desc and wtype != "File":
+ print(f"DESCRIPTION MISSING for {names[0]}\n")
+
+ if desc:
+ if output_in_txt:
+ self.print_desc_txt(desc)
+ else:
+ self.print_desc_rst(desc)
+
+ symbols = v.get("symbols")
+ if symbols:
+ print("Has the following ABI:\n")
+
+ for w, label in symbols:
+ # Escape special chars from content
+ content = self.re_escape.sub(r"\\\1", w)
+
+ print(f"- :ref:`{content} <{label}>`\n")
+
+ users = v.get("users")
+ if users and users.strip(" \t\n"):
+ print(f"Users:\n\t{users.strip("\n").replace('\n', '\n\t')}\n")
+
+ def check_issues(self):
+ """Warn about duplicated ABI entries"""
+
+ for what, v in self.what_symbols.items():
+ files = v.get("file")
+ if not files:
+ # Should never happen if the parser works properly
+ self.log.warning("%s doesn't have a file associated", what)
+ continue
+
+ if len(files) == 1:
+ continue
+
+ f = []
+ for fname, lines in sorted(files.items()):
+ if not lines:
+ f.append(f"{fname}")
+ elif len(lines) == 1:
+ f.append(f"{fname}:{lines[0]}")
+ else:
+ f.append(f"{fname} lines {", ".join(str(x) for x in lines)}")
+
+ self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))