diff options
86 files changed, 4808 insertions, 172 deletions
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index f9b7bcb5a630..2d73e8697523 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -32,3 +32,4 @@ Linux Tracing Technologies sys-t coresight/index user_events + rv/index diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index b175d88f31eb..4274cc6a2f94 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -28,10 +28,10 @@ Synopsis of kprobe_events ------------------------- :: - p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe - r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe - p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS] : Set a return probe - -:[GRP/]EVENT : Clear a probe + p[:[GRP/][EVENT]] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe + p[:[GRP/][EVENT]] [MOD:]SYM[+0]%return [FETCHARGS] : Set a return probe + -:[GRP/][EVENT] : Clear a probe GRP : Group name. If omitted, use "kprobes" for it. EVENT : Event name. If omitted, the event name is generated diff --git a/Documentation/trace/rv/da_monitor_instrumentation.rst b/Documentation/trace/rv/da_monitor_instrumentation.rst new file mode 100644 index 000000000000..6c67c7b57811 --- /dev/null +++ b/Documentation/trace/rv/da_monitor_instrumentation.rst @@ -0,0 +1,171 @@ +Deterministic Automata Instrumentation +====================================== + +The RV monitor file created by dot2k, with the name "$MODEL_NAME.c" +includes a section dedicated to instrumentation. + +In the example of the wip.dot monitor created on [1], it will look like:: + + /* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ + static void handle_preempt_disable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_disable_wip); + } + + static void handle_preempt_enable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_enable_wip); + } + + static void handle_sched_waking(void *data, /* XXX: fill header */) + { + da_handle_event_wip(sched_waking_wip); + } + + static int enable_wip(void) + { + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + + return 0; + } + +The comment at the top of the section explains the general idea: the +instrumentation section translates *kernel events* into the *model's +event*. + +Tracing callback functions +-------------------------- + +The first three functions are the starting point of the callback *handler +functions* for each of the three events from the wip model. The developer +does not necessarily need to use them: they are just starting points. + +Using the example of:: + + void handle_preempt_disable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_disable_wip); + } + +The preempt_disable event from the model connects directly to the +preemptirq:preempt_disable. The preemptirq:preempt_disable event +has the following signature, from include/trace/events/preemptirq.h:: + + TP_PROTO(unsigned long ip, unsigned long parent_ip) + +Hence, the handle_preempt_disable() function will look like:: + + void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) + +In this case, the kernel event translates one to one with the automata +event, and indeed, no other change is required for this function. + +The next handler function, handle_preempt_enable() has the same argument +list from the handle_preempt_disable(). The difference is that the +preempt_enable event will be used to synchronize the system to the model. + +Initially, the *model* is placed in the initial state. However, the *system* +might or might not be in the initial state. The monitor cannot start +processing events until it knows that the system has reached the initial state. +Otherwise, the monitor and the system could be out-of-sync. + +Looking at the automata definition, it is possible to see that the system +and the model are expected to return to the initial state after the +preempt_enable execution. Hence, it can be used to synchronize the +system and the model at the initialization of the monitoring section. + +The start is informed via a special handle function, the +"da_handle_start_event_$(MONITOR_NAME)(event)", in this case:: + + da_handle_start_event_wip(preempt_enable_wip); + +So, the callback function will look like:: + + void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) + { + da_handle_start_event_wip(preempt_enable_wip); + } + +Finally, the "handle_sched_waking()" will look like:: + + void handle_sched_waking(void *data, struct task_struct *task) + { + da_handle_event_wip(sched_waking_wip); + } + +And the explanation is left for the reader as an exercise. + +enable and disable functions +---------------------------- + +dot2k automatically creates two special functions:: + + enable_$(MONITOR_NAME)() + disable_$(MONITOR_NAME)() + +These functions are called when the monitor is enabled and disabled, +respectively. + +They should be used to *attach* and *detach* the instrumentation to the running +system. The developer must add to the relative function all that is needed to +*attach* and *detach* its monitor to the system. + +For the wip case, these functions were named:: + + enable_wip() + disable_wip() + +But no change was required because: by default, these functions *attach* and +*detach* the tracepoints_to_attach, which was enough for this case. + +Instrumentation helpers +----------------------- + +To complete the instrumentation, the *handler functions* need to be attached to a +kernel event, at the monitoring enable phase. + +The RV interface also facilitates this step. For example, the macro "rv_attach_trace_probe()" +is used to connect the wip model events to the relative kernel event. dot2k automatically +adds "rv_attach_trace_probe()" function call for each model event in the enable phase, as +a suggestion. + +For example, from the wip sample model:: + + static int enable_wip(void) + { + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + + return 0; + } + +The probes then need to be detached at the disable phase. + +[1] The wip model is presented in:: + + Documentation/trace/rv/deterministic_automata.rst + +The wip monitor is presented in:: + + Documentation/trace/rv/da_monitor_synthesis.rst diff --git a/Documentation/trace/rv/da_monitor_synthesis.rst b/Documentation/trace/rv/da_monitor_synthesis.rst new file mode 100644 index 000000000000..0dbdcd1e62b9 --- /dev/null +++ b/Documentation/trace/rv/da_monitor_synthesis.rst @@ -0,0 +1,147 @@ +Deterministic Automata Monitor Synthesis +======================================== + +The starting point for the application of runtime verification (RV) technics +is the *specification* or *modeling* of the desired (or undesired) behavior +of the system under scrutiny. + +The formal representation needs to be then *synthesized* into a *monitor* +that can then be used in the analysis of the trace of the system. The +*monitor* connects to the system via an *instrumentation* that converts +the events from the *system* to the events of the *specification*. + + +In Linux terms, the runtime verification monitors are encapsulated inside +the *RV monitor* abstraction. The RV monitor includes a set of instances +of the monitor (per-cpu monitor, per-task monitor, and so on), the helper +functions that glue the monitor to the system reference model, and the +trace output as a reaction to event parsing and exceptions, as depicted +below:: + + Linux +----- RV Monitor ----------------------------------+ Formal + Realm | | Realm + +-------------------+ +----------------+ +-----------------+ + | Linux kernel | | Monitor | | Reference | + | Tracing | -> | Instance(s) | <- | Model | + | (instrumentation) | | (verification) | | (specification) | + +-------------------+ +----------------+ +-----------------+ + | | | + | V | + | +----------+ | + | | Reaction | | + | +--+--+--+-+ | + | | | | | + | | | +-> trace output ? | + +------------------------|--|----------------------+ + | +----> panic ? + +-------> <user-specified> + +DA monitor synthesis +-------------------- + +The synthesis of automata-based models into the Linux *RV monitor* abstraction +is automated by the dot2k tool and the rv/da_monitor.h header file that +contains a set of macros that automatically generate the monitor's code. + +dot2k +----- + +The dot2k utility leverages dot2c by converting an automaton model in +the DOT format into the C representation [1] and creating the skeleton of +a kernel monitor in C. + +For example, it is possible to transform the wip.dot model present in +[1] into a per-cpu monitor with the following command:: + + $ dot2k -d wip.dot -t per_cpu + +This will create a directory named wip/ with the following files: + +- wip.h: the wip model in C +- wip.c: the RV monitor + +The wip.c file contains the monitor declaration and the starting point for +the system instrumentation. + +Monitor macros +-------------- + +The rv/da_monitor.h enables automatic code generation for the *Monitor +Instance(s)* using C macros. + +The benefits of the usage of macro for monitor synthesis are 3-fold as it: + +- Reduces the code duplication; +- Facilitates the bug fix/improvement; +- Avoids the case of developers changing the core of the monitor code + to manipulate the model in a (let's say) non-standard way. + +This initial implementation presents three different types of monitor instances: + +- ``#define DECLARE_DA_MON_GLOBAL(name, type)`` +- ``#define DECLARE_DA_MON_PER_CPU(name, type)`` +- ``#define DECLARE_DA_MON_PER_TASK(name, type)`` + +The first declares the functions for a global deterministic automata monitor, +the second for monitors with per-cpu instances, and the third with per-task +instances. + +In all cases, the 'name' argument is a string that identifies the monitor, and +the 'type' argument is the data type used by dot2k on the representation of +the model in C. + +For example, the wip model with two states and three events can be +stored in an 'unsigned char' type. Considering that the preemption control +is a per-cpu behavior, the monitor declaration in the 'wip.c' file is:: + + DECLARE_DA_MON_PER_CPU(wip, unsigned char); + +The monitor is executed by sending events to be processed via the functions +presented below:: + + da_handle_event_$(MONITOR_NAME)($(event from event enum)); + da_handle_start_event_$(MONITOR_NAME)($(event from event enum)); + da_handle_start_run_event_$(MONITOR_NAME)($(event from event enum)); + +The function ``da_handle_event_$(MONITOR_NAME)()`` is the regular case where +the event will be processed if the monitor is processing events. + +When a monitor is enabled, it is placed in the initial state of the automata. +However, the monitor does not know if the system is in the *initial state*. + +The ``da_handle_start_event_$(MONITOR_NAME)()`` function is used to notify the +monitor that the system is returning to the initial state, so the monitor can +start monitoring the next event. + +The ``da_handle_start_run_event_$(MONITOR_NAME)()`` function is used to notify +the monitor that the system is known to be in the initial state, so the +monitor can start monitoring and monitor the current event. + +Using the wip model as example, the events "preempt_disable" and +"sched_waking" should be sent to monitor, respectively, via [2]:: + + da_handle_event_wip(preempt_disable_wip); + da_handle_event_wip(sched_waking_wip); + +While the event "preempt_enabled" will use:: + + da_handle_start_event_wip(preempt_enable_wip); + +To notify the monitor that the system will be returning to the initial state, +so the system and the monitor should be in sync. + +Final remarks +------------- + +With the monitor synthesis in place using the rv/da_monitor.h and +dot2k, the developer's work should be limited to the instrumentation +of the system, increasing the confidence in the overall approach. + +[1] For details about deterministic automata format and the translation +from one representation to another, see:: + + Documentation/trace/rv/deterministic_automata.rst + +[2] dot2k appends the monitor's name suffix to the events enums to +avoid conflicting variables when exporting the global vmlinux.h +use by BPF programs. diff --git a/Documentation/trace/rv/deterministic_automata.rst b/Documentation/trace/rv/deterministic_automata.rst new file mode 100644 index 000000000000..d0638f95a455 --- /dev/null +++ b/Documentation/trace/rv/deterministic_automata.rst @@ -0,0 +1,184 @@ +Deterministic Automata +====================== + +Formally, a deterministic automaton, denoted by G, is defined as a quintuple: + + *G* = { *X*, *E*, *f*, x\ :subscript:`0`, X\ :subscript:`m` } + +where: + +- *X* is the set of states; +- *E* is the finite set of events; +- x\ :subscript:`0` is the initial state; +- X\ :subscript:`m` (subset of *X*) is the set of marked (or final) states. +- *f* : *X* x *E* -> *X* $ is the transition function. It defines the state + transition in the occurrence of an event from *E* in the state *X*. In the + special case of deterministic automata, the occurrence of the event in *E* + in a state in *X* has a deterministic next state from *X*. + +For example, a given automaton named 'wip' (wakeup in preemptive) can +be defined as: + +- *X* = { ``preemptive``, ``non_preemptive``} +- *E* = { ``preempt_enable``, ``preempt_disable``, ``sched_waking``} +- x\ :subscript:`0` = ``preemptive`` +- X\ :subscript:`m` = {``preemptive``} +- *f* = + - *f*\ (``preemptive``, ``preempt_disable``) = ``non_preemptive`` + - *f*\ (``non_preemptive``, ``sched_waking``) = ``non_preemptive`` + - *f*\ (``non_preemptive``, ``preempt_enable``) = ``preemptive`` + +One of the benefits of this formal definition is that it can be presented +in multiple formats. For example, using a *graphical representation*, using +vertices (nodes) and edges, which is very intuitive for *operating system* +practitioners, without any loss. + +The previous 'wip' automaton can also be represented as:: + + preempt_enable + +---------------------------------+ + v | + #============# preempt_disable +------------------+ + --> H preemptive H -----------------> | non_preemptive | + #============# +------------------+ + ^ | + | sched_waking | + +--------------+ + +Deterministic Automaton in C +---------------------------- + +In the paper "Efficient formal verification for the Linux kernel", +the authors present a simple way to represent an automaton in C that can +be used as regular code in the Linux kernel. + +For example, the 'wip' automata can be presented as (augmented with comments):: + + /* enum representation of X (set of states) to be used as index */ + enum states { + preemptive = 0, + non_preemptive, + state_max + }; + + #define INVALID_STATE state_max + + /* enum representation of E (set of events) to be used as index */ + enum events { + preempt_disable = 0, + preempt_enable, + sched_waking, + event_max + }; + + struct automaton { + char *state_names[state_max]; // X: the set of states + char *event_names[event_max]; // E: the finite set of events + unsigned char function[state_max][event_max]; // f: transition function + unsigned char initial_state; // x_0: the initial state + bool final_states[state_max]; // X_m: the set of marked states + }; + + struct automaton aut = { + .state_names = { + "preemptive", + "non_preemptive" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "sched_waking" + }, + .function = { + { non_preemptive, INVALID_STATE, INVALID_STATE }, + { INVALID_STATE, preemptive, non_preemptive }, + }, + .initial_state = preemptive, + .final_states = { 1, 0 }, + }; + +The *transition function* is represented as a matrix of states (lines) and +events (columns), and so the function *f* : *X* x *E* -> *X* can be solved +in O(1). For example:: + + next_state = automaton_wip.function[curr_state][event]; + +Graphviz .dot format +-------------------- + +The Graphviz open-source tool can produce the graphical representation +of an automaton using the (textual) DOT language as the source code. +The DOT format is widely used and can be converted to many other formats. + +For example, this is the 'wip' model in DOT:: + + digraph state_automaton { + {node [shape = circle] "non_preemptive"}; + {node [shape = plaintext, style=invis, label=""] "__init_preemptive"}; + {node [shape = doublecircle] "preemptive"}; + {node [shape = circle] "preemptive"}; + "__init_preemptive" -> "preemptive"; + "non_preemptive" [label = "non_preemptive"]; + "non_preemptive" -> "non_preemptive" [ label = "sched_waking" ]; + "non_preemptive" -> "preemptive" [ label = "preempt_enable" ]; + "preemptive" [label = "preemptive"]; + "preemptive" -> "non_preemptive" [ label = "preempt_disable" ]; + { rank = min ; + "__init_preemptive"; + "preemptive"; + } + } + +This DOT format can be transformed into a bitmap or vectorial image +using the dot utility, or into an ASCII art using graph-easy. For +instance:: + + $ dot -Tsvg -o wip.svg wip.dot + $ graph-easy wip.dot > wip.txt + +dot2c +----- + +dot2c is a utility that can parse a .dot file containing an automaton as +in the example above and automatically convert it to the C representation +presented in [3]. + +For example, having the previous 'wip' model into a file named 'wip.dot', +the following command will transform the .dot file into the C +representation (previously shown) in the 'wip.h' file:: + + $ dot2c wip.dot > wip.h + +The 'wip.h' content is the code sample in section 'Deterministic Automaton +in C'. + +Remarks +------- + +The automata formalism allows modeling discrete event systems (DES) in +multiple formats, suitable for different applications/users. + +For example, the formal description using set theory is better suitable +for automata operations, while the graphical format for human interpretation; +and computer languages for machine execution. + +References +---------- + +Many textbooks cover automata formalism. For a brief introduction see:: + + O'Regan, Gerard. Concise guide to software engineering. Springer, + Cham, 2017. + +For a detailed description, including operations, and application on Discrete +Event Systems (DES), see:: + + Cassandras, Christos G., and Stephane Lafortune, eds. Introduction to discrete + event systems. Boston, MA: Springer US, 2008. + +For the C representation in kernel, see:: + + De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo + Silva. Efficient formal verification for the Linux kernel. In: + International Conference on Software Engineering and Formal Methods. + Springer, Cham, 2019. p. 315-332. diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst new file mode 100644 index 000000000000..15fa966102c0 --- /dev/null +++ b/Documentation/trace/rv/index.rst @@ -0,0 +1,14 @@ +==================== +Runtime Verification +==================== + +.. toctree:: + :maxdepth: 2 + :glob: + + runtime-verification.rst + deterministic_automata.rst + da_monitor_synthesis.rst + da_monitor_instrumentation.rst + monitor_wip.rst + monitor_wwnr.rst diff --git a/Documentation/trace/rv/monitor_wip.rst b/Documentation/trace/rv/monitor_wip.rst new file mode 100644 index 000000000000..a95763438c48 --- /dev/null +++ b/Documentation/trace/rv/monitor_wip.rst @@ -0,0 +1,55 @@ +Monitor wip +=========== + +- Name: wip - wakeup in preemptive +- Type: per-cpu deterministic automaton +- Author: Daniel Bristot de Oliveira <bristot@kernel.org> + +Description +----------- + +The wakeup in preemptive (wip) monitor is a sample per-cpu monitor +that verifies if the wakeup events always take place with +preemption disabled:: + + | + | + v + #==================# + H preemptive H <+ + #==================# | + | | + | preempt_disable | preempt_enable + v | + sched_waking +------------------+ | + +--------------- | | | + | | non_preemptive | | + +--------------> | | -+ + +------------------+ + +The wakeup event always takes place with preemption disabled because +of the scheduler synchronization. However, because the preempt_count +and its trace event are not atomic with regard to interrupts, some +inconsistencies might happen. For example:: + + preempt_disable() { + __preempt_count_add(1) + -------> smp_apic_timer_interrupt() { + preempt_disable() + do not trace (preempt count >= 1) + + wake up a thread + + preempt_enable() + do not trace (preempt count >= 1) + } + <------ + trace_preempt_disable(); + } + +This problem was reported and discussed here: + https://lore.kernel.org/r/cover.1559051152.git.bristot@redhat.com/ + +Specification +------------- +Grapviz Dot file in tools/verification/models/wip.dot diff --git a/Documentation/trace/rv/monitor_wwnr.rst b/Documentation/trace/rv/monitor_wwnr.rst new file mode 100644 index 000000000000..80f1777b85aa --- /dev/null +++ b/Documentation/trace/rv/monitor_wwnr.rst @@ -0,0 +1,45 @@ +Monitor wwnr +============ + +- Name: wwrn - wakeup while not running +- Type: per-task deterministic automaton +- Author: Daniel Bristot de Oliveira <bristot@kernel.org> + +Description +----------- + +This is a per-task sample monitor, with the following +definition:: + + | + | + v + wakeup +-------------+ + +--------- | | + | | not_running | + +--------> | | <+ + +-------------+ | + | | + | switch_in | switch_out + v | + +-------------+ | + | running | -+ + +-------------+ + +This model is borken, the reason is that a task can be running +in the processor without being set as RUNNABLE. Think about a +task about to sleep:: + + 1: set_current_state(TASK_UNINTERRUPTIBLE); + 2: schedule(); + +And then imagine an IRQ happening in between the lines one and two, +waking the task up. BOOM, the wakeup will happen while the task is +running. + +- Why do we need this model, so? +- To test the reactors. + +Specification +------------- +Grapviz Dot file in tools/verification/models/wwnr.dot diff --git a/Documentation/trace/rv/runtime-verification.rst b/Documentation/trace/rv/runtime-verification.rst new file mode 100644 index 000000000000..c46b6149470e --- /dev/null +++ b/Documentation/trace/rv/runtime-verification.rst @@ -0,0 +1,231 @@ +==================== +Runtime Verification +==================== + +Runtime Verification (RV) is a lightweight (yet rigorous) method that +complements classical exhaustive verification techniques (such as *model +checking* and *theorem proving*) with a more practical approach for complex +systems. + +Instead of relying on a fine-grained model of a system (e.g., a +re-implementation a instruction level), RV works by analyzing the trace of the +system's actual execution, comparing it against a formal specification of +the system behavior. + +The main advantage is that RV can give precise information on the runtime +behavior of the monitored system, without the pitfalls of developing models +that require a re-implementation of the entire system in a modeling language. +Moreover, given an efficient monitoring method, it is possible execute an +*online* verification of a system, enabling the *reaction* for unexpected +events, avoiding, for example, the propagation of a failure on safety-critical +systems. + +Runtime Monitors and Reactors +============================= + +A monitor is the central part of the runtime verification of a system. The +monitor stands in between the formal specification of the desired (or +undesired) behavior, and the trace of the actual system. + +In Linux terms, the runtime verification monitors are encapsulated inside the +*RV monitor* abstraction. A *RV monitor* includes a reference model of the +system, a set of instances of the monitor (per-cpu monitor, per-task monitor, +and so on), and the helper functions that glue the monitor to the system via +trace, as depicted bellow:: + + Linux +---- RV Monitor ----------------------------------+ Formal + Realm | | Realm + +-------------------+ +----------------+ +-----------------+ + | Linux kernel | | Monitor | | Reference | + | Tracing | -> | Instance(s) | <- | Model | + | (instrumentation) | | (verification) | | (specification) | + +-------------------+ +----------------+ +-----------------+ + | | | + | V | + | +----------+ | + | | Reaction | | + | +--+--+--+-+ | + | | | | | + | | | +-> trace output ? | + +------------------------|--|----------------------+ + | +----> panic ? + +-------> <user-specified> + +In addition to the verification and monitoring of the system, a monitor can +react to an unexpected event. The forms of reaction can vary from logging the +event occurrence to the enforcement of the correct behavior to the extreme +action of taking a system down to avoid the propagation of a failure. + +In Linux terms, a *reactor* is an reaction method available for *RV monitors*. +By default, all monitors should provide a trace output of their actions, +which is already a reaction. In addition, other reactions will be available +so the user can enable them as needed. + +For further information about the principles of runtime verification and +RV applied to Linux: + + Bartocci, Ezio, et al. *Introduction to runtime verification.* In: Lectures on + Runtime Verification. Springer, Cham, 2018. p. 1-33. + + Falcone, Ylies, et al. *A taxonomy for classifying runtime verification tools.* + In: International Conference on Runtime Verification. Springer, Cham, 2018. p. + 241-262. + + De Oliveira, Daniel Bristot. *Automata-based formal analysis and + verification of the real-time Linux kernel.* Ph.D. Thesis, 2020. + +Online RV monitors +================== + +Monitors can be classified as *offline* and *online* monitors. *Offline* +monitor process the traces generated by a system after the events, generally by +reading the trace execution from a permanent storage system. *Online* monitors +process the trace during the execution of the system. Online monitors are said +to be *synchronous* if the processing of an event is attached to the system +execution, blocking the system during the event monitoring. On the other hand, +an *asynchronous* monitor has its execution detached from the system. Each type +of monitor has a set of advantages. For example, *offline* monitors can be +executed on different machines but require operations to save the log to a +file. In contrast, *synchronous online* method can react at the exact moment +a violation occurs. + +Another important aspect regarding monitors is the overhead associated with the +event analysis. If the system generates events at a frequency higher than the +monitor's ability to process them in the same system, only the *offline* +methods are viable. On the other hand, if the tracing of the events incurs +on higher overhead than the simple handling of an event by a monitor, then a +*synchronous online* monitors will incur on lower overhead. + +Indeed, the research presented in: + + De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo Silva. + *Efficient formal verification for the Linux kernel.* In: International + Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. + p. 315-332. + +Shows that for Deterministic Automata models, the synchronous processing of +events in-kernel causes lower overhead than saving the same events to the trace +buffer, not even considering collecting the trace for user-space analysis. +This motivated the development of an in-kernel interface for online monitors. + +For further information about modeling of Linux kernel behavior using automata, +see: + + De Oliveira, Daniel B.; De Oliveira, Romulo S.; Cucinotta, Tommaso. *A thread + synchronization model for the PREEMPT_RT Linux kernel.* Journal of Systems + Architecture, 2020, 107: 101729. + +The user interface +================== + +The user interface resembles the tracing interface (on purpose). It is +currently at "/sys/kernel/tracing/rv/". + +The following files/folders are currently available: + +**available_monitors** + +- Reading list the available monitors, one per line + +For example:: + + # cat available_monitors + wip + wwnr + +**available_reactors** + +- Reading shows the available reactors, one per line. + +For example:: + + # cat available_reactors + nop + panic + printk + +**enabled_monitors**: + +- Reading lists the enabled monitors, one per line +- Writing to it enables a given monitor +- Writing a monitor name with a '!' prefix disables it +- Truncating the file disables all enabled monitors + +For example:: + + # cat enabled_monitors + # echo wip > enabled_monitors + # echo wwnr >> enabled_monitors + # cat enabled_monitors + wip + wwnr + # echo '!wip' >> enabled_monitors + # cat enabled_monitors + wwnr + # echo > enabled_monitors + # cat enabled_monitors + # + +Note that it is possible to enable more than one monitor concurrently. + +**monitoring_on** + +This is an on/off general switcher for monitoring. It resembles the +"tracing_on" switcher in the trace interface. + +- Writing "0" stops the monitoring +- Writing "1" continues the monitoring +- Reading returns the current status of the monitoring + +Note that it does not disable enabled monitors but stop the per-entity +monitors monitoring the events received from the system. + +**reacting_on** + +- Writing "0" prevents reactions for happening +- Writing "1" enable reactions +- Reading returns the current status of the reaction + +**monitors/** + +Each monitor will have its own directory inside "monitors/". There the +monitor-specific files will be presented. The "monitors/" directory resembles +the "events" directory on tracefs. + +For example:: + + # cd monitors/wip/ + # ls + desc enable + # cat desc + wakeup in preemptive per-cpu testing monitor. + # cat enable + 0 + +**monitors/MONITOR/desc** + +- Reading shows a description of the monitor *MONITOR* + +**monitors/MONITOR/enable** + +- Writing "0" disables the *MONITOR* +- Writing "1" enables the *MONITOR* +- Reading return the current status of the *MONITOR* + +**monitors/MONITOR/reactors** + +- List available reactors, with the select reaction for the given *MONITOR* + inside "[]". The default one is the nop (no operation) reactor. +- Writing the name of a reactor enables it to the given MONITOR. + +For example:: + + # cat monitors/wip/reactors + [nop] + panic + printk + # echo panic > monitors/wip/reactors + # cat monitors/wip/reactors + nop + [panic] + printk diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst index a8e5938f609e..3a1797d707f4 100644 --- a/Documentation/trace/uprobetracer.rst +++ b/Documentation/trace/uprobetracer.rst @@ -26,10 +26,10 @@ Synopsis of uprobe_tracer ------------------------- :: - p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe - r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) - p[:[GRP/]EVENT] PATH:OFFSET%return [FETCHARGS] : Set a return uprobe (uretprobe) - -:[GRP/]EVENT : Clear uprobe or uretprobe event + p[:[GRP/][EVENT]] PATH:OFFSET [FETCHARGS] : Set a uprobe + r[:[GRP/][EVENT]] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) + p[:[GRP/][EVENT]] PATH:OFFSET%return [FETCHARGS] : Set a return uprobe (uretprobe) + -:[GRP/][EVENT] : Clear uprobe or uretprobe event GRP : Group name. If omitted, "uprobes" is the default value. EVENT : Event name. If omitted, the event name is generated based diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 24b9fa89aa27..bd165004776d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -91,6 +91,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) /* Make sure it is what we expect it to be */ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + ftrace_expected = old_code; WARN_ON(1); return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 707f1053f0b7..582b6f68df3d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -26,14 +26,10 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, TP_PROTO(const char *function, struct va_format *vaf), TP_ARGS(function, vaf), TP_STRUCT__entry(__string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign(__assign_str(function, function); - WARN_ON_ONCE(vsnprintf - (__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= - MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("(%s) %s", __get_str(function), diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index 4714c86bb501..64e7a767d963 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -52,15 +52,12 @@ DECLARE_EVENT_CLASS(ath10k_log_event, TP_STRUCT__entry( __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) - __dynamic_array(char, msg, ATH10K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ar->dev)); __assign_str(driver, dev_driver_string(ar->dev)); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH10K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH10K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", @@ -92,16 +89,13 @@ TRACE_EVENT(ath10k_log_dbg, __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) __field(unsigned int, level) - __dynamic_array(char, msg, ATH10K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ar->dev)); __assign_str(driver, dev_driver_string(ar->dev)); __entry->level = level; - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH10K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH10K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", diff --git a/drivers/net/wireless/ath/ath11k/trace.h b/drivers/net/wireless/ath/ath11k/trace.h index a02e54735e88..76560587bea0 100644 --- a/drivers/net/wireless/ath/ath11k/trace.h +++ b/drivers/net/wireless/ath/ath11k/trace.h @@ -126,15 +126,12 @@ DECLARE_EVENT_CLASS(ath11k_log_event, TP_STRUCT__entry( __string(device, dev_name(ab->dev)) __string(driver, dev_driver_string(ab->dev)) - __dynamic_array(char, msg, ATH11K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ab->dev)); __assign_str(driver, dev_driver_string(ab->dev)); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH11K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH11K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", diff --git a/drivers/net/wireless/ath/ath6kl/trace.h b/drivers/net/wireless/ath/ath6kl/trace.h index a3d3740419eb..231a94769ddb 100644 --- a/drivers/net/wireless/ath/ath6kl/trace.h +++ b/drivers/net/wireless/ath/ath6kl/trace.h @@ -253,13 +253,10 @@ DECLARE_EVENT_CLASS(ath6kl_log_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, ATH6KL_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH6KL_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH6KL_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -284,14 +281,11 @@ TRACE_EVENT(ath6kl_log_dbg, TP_ARGS(level, vaf), TP_STRUCT__entry( __field(unsigned int, level) - __dynamic_array(char, msg, ATH6KL_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH6KL_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH6KL_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); diff --git a/drivers/net/wireless/ath/trace.h b/drivers/net/wireless/ath/trace.h index ba711644d27e..9935cf475b6d 100644 --- a/drivers/net/wireless/ath/trace.h +++ b/drivers/net/wireless/ath/trace.h @@ -40,16 +40,13 @@ TRACE_EVENT(ath_log, TP_STRUCT__entry( __string(device, wiphy_name(wiphy)) __string(driver, KBUILD_MODNAME) - __dynamic_array(char, msg, ATH_DBG_MAX_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, wiphy_name(wiphy)); __assign_str(driver, KBUILD_MODNAME); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH_DBG_MAX_LEN, - vaf->fmt, - *vaf->va) >= ATH_DBG_MAX_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( diff --git a/drivers/net/wireless/ath/wil6210/trace.h b/drivers/net/wireless/ath/wil6210/trace.h index 11c989e95880..201f44612c31 100644 --- a/drivers/net/wireless/ath/wil6210/trace.h +++ b/drivers/net/wireless/ath/wil6210/trace.h @@ -70,13 +70,10 @@ DECLARE_EVENT_CLASS(wil6210_log_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, WIL6210_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - WIL6210_MSG_MAX, - vaf->fmt, - *vaf->va) >= WIL6210_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h index 338c66d0c5f8..5a139d7ed47a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h @@ -33,13 +33,11 @@ TRACE_EVENT(brcmf_err, TP_ARGS(func, vaf), TP_STRUCT__entry( __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); @@ -50,14 +48,12 @@ TRACE_EVENT(brcmf_dbg, TP_STRUCT__entry( __field(u32, level) __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h index 0e8a69ab909f..488456420353 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h @@ -28,12 +28,10 @@ DECLARE_EVENT_CLASS(brcms_msg_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -64,14 +62,12 @@ TRACE_EVENT(brcms_dbg, TP_STRUCT__entry( __field(u32, level) __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h index 7dd70011fd1e..1d6c292cf545 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h @@ -18,12 +18,10 @@ DECLARE_EVENT_CLASS(iwlwifi_msg_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -55,14 +53,12 @@ TRACE_EVENT(iwlwifi_dbg, TP_STRUCT__entry( __field(u32, level) __string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(function, function); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); diff --git a/drivers/usb/chipidea/trace.h b/drivers/usb/chipidea/trace.h index 1601fd86c4c1..ca0e65b48f0a 100644 --- a/drivers/usb/chipidea/trace.h +++ b/drivers/usb/chipidea/trace.h @@ -28,11 +28,11 @@ TRACE_EVENT(ci_log, TP_ARGS(ci, vaf), TP_STRUCT__entry( __string(name, dev_name(ci->dev)) - __dynamic_array(char, msg, CHIPIDEA_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(ci->dev)); - vsnprintf(__get_str(msg), CHIPIDEA_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index a5da02077297..61e93a3540a7 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -28,9 +28,9 @@ DECLARE_EVENT_CLASS(xhci_log_msg, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), - TP_STRUCT__entry(__dynamic_array(char, msg, XHCI_MSG_MAX)), + TP_STRUCT__entry(__vstring(msg, vaf->fmt, vaf->va)), TP_fast_assign( - vsnprintf(__get_str(msg), XHCI_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); diff --git a/drivers/usb/mtu3/mtu3_trace.h b/drivers/usb/mtu3/mtu3_trace.h index a09deae1146f..03d2a9bac27e 100644 --- a/drivers/usb/mtu3/mtu3_trace.h +++ b/drivers/usb/mtu3/mtu3_trace.h @@ -18,18 +18,16 @@ #include "mtu3.h" -#define MTU3_MSG_MAX 256 - TRACE_EVENT(mtu3_log, TP_PROTO(struct device *dev, struct va_format *vaf), TP_ARGS(dev, vaf), TP_STRUCT__entry( __string(name, dev_name(dev)) - __dynamic_array(char, msg, MTU3_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(dev)); - vsnprintf(__get_str(msg), MTU3_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); diff --git a/drivers/usb/musb/musb_trace.h b/drivers/usb/musb/musb_trace.h index ec28b5716796..f246b14394c4 100644 --- a/drivers/usb/musb/musb_trace.h +++ b/drivers/usb/musb/musb_trace.h @@ -28,11 +28,11 @@ TRACE_EVENT(musb_log, TP_ARGS(musb, vaf), TP_STRUCT__entry( __string(name, dev_name(musb->controller)) - __dynamic_array(char, msg, MUSB_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(musb->controller)); - vsnprintf(__get_str(msg), MUSB_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); diff --git a/include/linux/rv.h b/include/linux/rv.h new file mode 100644 index 000000000000..8883b41d88ec --- /dev/null +++ b/include/linux/rv.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime Verification. + * + * For futher information, see: kernel/trace/rv/rv.c. + */ +#ifndef _LINUX_RV_H +#define _LINUX_RV_H + +#define MAX_DA_NAME_LEN 24 + +#ifdef CONFIG_RV +/* + * Deterministic automaton per-object variables. + */ +struct da_monitor { + bool monitoring; + unsigned int curr_state; +}; + +/* + * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think about + * adding more or developing a dynamic method. So far, none of + * these are justified. + */ +#define RV_PER_TASK_MONITORS 1 +#define RV_PER_TASK_MONITOR_INIT (RV_PER_TASK_MONITORS) + +/* + * Futher monitor types are expected, so make this a union. + */ +union rv_task_monitor { + struct da_monitor da_mon; +}; + +#ifdef CONFIG_RV_REACTORS +struct rv_reactor { + const char *name; + const char *description; + void (*react)(char *msg); +}; +#endif + +struct rv_monitor { + const char *name; + const char *description; + bool enabled; + int (*enable)(void); + void (*disable)(void); + void (*reset)(void); +#ifdef CONFIG_RV_REACTORS + void (*react)(char *msg); +#endif +}; + +bool rv_monitoring_on(void); +int rv_unregister_monitor(struct rv_monitor *monitor); +int rv_register_monitor(struct rv_monitor *monitor); +int rv_get_task_monitor_slot(void); +void rv_put_task_monitor_slot(int slot); + +#ifdef CONFIG_RV_REACTORS +bool rv_reacting_on(void); +int rv_unregister_reactor(struct rv_reactor *reactor); +int rv_register_reactor(struct rv_reactor *reactor); +#endif /* CONFIG_RV_REACTORS */ + +#endif /* CONFIG_RV */ +#endif /* _LINUX_RV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6b0866c71ed..0bbfee0bb028 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include <linux/rseq.h> #include <linux/seqlock.h> #include <linux/kcsan.h> +#include <linux/rv.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -1501,6 +1502,16 @@ struct task_struct { struct callback_head l1d_flush_kill; #endif +#ifdef CONFIG_RV + /* + * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think + * about adding more or developing a dynamic method. So far, + * none of these are justified. + */ + union rv_task_monitor rv[RV_PER_TASK_MONITORS]; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e6e95a9f07a5..b18759a673c6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif +#define TRACE_EVENT_STR_MAX 512 + +/* + * gcc warns that you can not use a va_list in an inlined + * function. But lets me make it into a macro :-/ + */ +#define __trace_event_vstr_len(fmt, va) \ +({ \ + va_list __ap; \ + int __ret; \ + \ + va_copy(__ap, *(va)); \ + __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ + va_end(__ap); \ + \ + min(__ret, TRACE_EVENT_STR_MAX); \ +}) + #endif /* _LINUX_TRACE_EVENT_H */ /* diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 55717a2eda08..4b33b95eb8be 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -151,7 +151,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) /* * Individual subsystem my have a separate configuration to * enable their tracepoints. By default, this file will create - * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem + * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created * it can define NOTRACE before including the tracepoint headers. */ diff --git a/include/rv/automata.h b/include/rv/automata.h new file mode 100644 index 000000000000..eb9e636809a0 --- /dev/null +++ b/include/rv/automata.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Deterministic automata helper functions, to be used with the automata + * models in C generated by the dot2k tool. + */ + +/* + * DECLARE_AUTOMATA_HELPERS - define a set of helper functions for automata + * + * Define a set of helper functions for automata. The 'name' argument is used + * as suffix for the functions and data. These functions will handle automaton + * with data type 'type'. + */ +#define DECLARE_AUTOMATA_HELPERS(name, type) \ + \ +/* \ + * model_get_state_name_##name - return the (string) name of the given state \ + */ \ +static char *model_get_state_name_##name(enum states_##name state) \ +{ \ + if ((state < 0) || (state >= state_max_##name)) \ + return "INVALID"; \ + \ + return automaton_##name.state_names[state]; \ +} \ + \ +/* \ + * model_get_event_name_##name - return the (string) name of the given event \ + */ \ +static char *model_get_event_name_##name(enum events_##name event) \ +{ \ + if ((event < 0) || (event >= event_max_##name)) \ + return "INVALID"; \ + \ + return automaton_##name.event_names[event]; \ +} \ + \ +/* \ + * model_get_initial_state_##name - return the automaton's initial state \ + */ \ +static inline type model_get_initial_state_##name(void) \ +{ \ + return automaton_##name.initial_state; \ +} \ + \ +/* \ + * model_get_next_state_##name - process an automaton event occurrence \ + * \ + * Given the current state (curr_state) and the event (event), returns \ + * the next state, or INVALID_STATE in case of error. \ + */ \ +static inline type model_get_next_state_##name(enum states_##name curr_state, \ + enum events_##name event) \ +{ \ + if ((curr_state < 0) || (curr_state >= state_max_##name)) \ + return INVALID_STATE; \ + \ + if ((event < 0) || (event >= event_max_##name)) \ + return INVALID_STATE; \ + \ + return automaton_##name.function[curr_state][event]; \ +} \ + \ +/* \ + * model_is_final_state_##name - check if the given state is a final state \ + */ \ +static inline bool model_is_final_state_##name(enum states_##name state) \ +{ \ + if ((state < 0) || (state >= state_max_##name)) \ + return 0; \ + \ + return automaton_##name.final_states[state]; \ +} diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h new file mode 100644 index 000000000000..9eb75683e012 --- /dev/null +++ b/include/rv/da_monitor.h @@ -0,0 +1,544 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Deterministic automata (DA) monitor functions, to be used together + * with automata models in C generated by the dot2k tool. + * + * The dot2k tool is available at tools/verification/dot2k/ + * + * For further information, see: + * Documentation/trace/rv/da_monitor_synthesis.rst + */ + +#include <rv/automata.h> +#include <linux/rv.h> +#include <linux/bug.h> + +#ifdef CONFIG_RV_REACTORS + +#define DECLARE_RV_REACTING_HELPERS(name, type) \ +static char REACT_MSG_##name[1024]; \ + \ +static inline char *format_react_msg_##name(type curr_state, type event) \ +{ \ + snprintf(REACT_MSG_##name, 1024, \ + "rv: monitor %s does not allow event %s on state %s\n", \ + #name, \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(curr_state)); \ + return REACT_MSG_##name; \ +} \ + \ +static void cond_react_##name(char *msg) \ +{ \ + if (rv_##name.react) \ + rv_##name.react(msg); \ +} \ + \ +static bool rv_reacting_on_##name(void) \ +{ \ + return rv_reacting_on(); \ +} + +#else /* CONFIG_RV_REACTOR */ + +#define DECLARE_RV_REACTING_HELPERS(name, type) \ +static inline char *format_react_msg_##name(type curr_state, type event) \ +{ \ + return NULL; \ +} \ + \ +static void cond_react_##name(char *msg) \ +{ \ + return; \ +} \ + \ +static bool rv_reacting_on_##name(void) \ +{ \ + return 0; \ +} +#endif + +/* + * Generic helpers for all types of deterministic automata monitors. + */ +#define DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ + \ +DECLARE_RV_REACTING_HELPERS(name, type) \ + \ +/* \ + * da_monitor_reset_##name - reset a monitor and setting it to init state \ + */ \ +static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \ +{ \ + da_mon->monitoring = 0; \ + da_mon->curr_state = model_get_initial_state_##name(); \ +} \ + \ +/* \ + * da_monitor_curr_state_##name - return the current state \ + */ \ +static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \ +{ \ + return da_mon->curr_state; \ +} \ + \ +/* \ + * da_monitor_set_state_##name - set the new current state \ + */ \ +static inline void \ +da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \ +{ \ + da_mon->curr_state = state; \ +} \ + \ +/* \ + * da_monitor_start_##name - start monitoring \ + * \ + * The monitor will ignore all events until monitoring is set to true. This \ + * function needs to be called to tell the monitor to start monitoring. \ + */ \ +static inline void da_monitor_start_##name(struct da_monitor *da_mon) \ +{ \ + da_mon->curr_state = model_get_initial_state_##name(); \ + da_mon->monitoring = 1; \ +} \ + \ +/* \ + * da_monitoring_##name - returns true if the monitor is processing events \ + */ \ +static inline bool da_monitoring_##name(struct da_monitor *da_mon) \ +{ \ + return da_mon->monitoring; \ +} \ + \ +/* \ + * da_monitor_enabled_##name - checks if the monitor is enabled \ + */ \ +static inline bool da_monitor_enabled_##name(void) \ +{ \ + /* global switch */ \ + if (unlikely(!rv_monitoring_on())) \ + return 0; \ + \ + /* monitor enabled */ \ + if (unlikely(!rv_##name.enabled)) \ + return 0; \ + \ + return 1; \ +} \ + \ +/* \ + * da_monitor_handling_event_##name - checks if the monitor is ready to handle events \ + */ \ +static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon) \ +{ \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + /* monitor is actually monitoring */ \ + if (unlikely(!da_monitoring_##name(da_mon))) \ + return 0; \ + \ + return 1; \ +} + +/* + * Event handler for implicit monitors. Implicit monitor is the one which the + * handler does not need to specify which da_monitor to manipulate. Examples + * of implicit monitor are the per_cpu or the global ones. + */ +#define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ + \ +static inline bool \ +da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ +{ \ + type curr_state = da_monitor_curr_state_##name(da_mon); \ + type next_state = model_get_next_state_##name(curr_state, event); \ + \ + if (next_state != INVALID_STATE) { \ + da_monitor_set_state_##name(da_mon, next_state); \ + \ + trace_event_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + \ + return true; \ + } \ + \ + if (rv_reacting_on_##name()) \ + cond_react_##name(format_react_msg_##name(curr_state, event)); \ + \ + trace_error_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + \ + return false; \ +} \ + +/* + * Event handler for per_task monitors. + */ +#define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ + \ +static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ + enum events_##name event) \ +{ \ + type curr_state = da_monitor_curr_state_##name(da_mon); \ + type next_state = model_get_next_state_##name(curr_state, event); \ + \ + if (next_state != INVALID_STATE) { \ + da_monitor_set_state_##name(da_mon, next_state); \ + \ + trace_event_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + \ + return true; \ + } \ + \ + if (rv_reacting_on_##name()) \ + cond_react_##name(format_react_msg_##name(curr_state, event)); \ + \ + trace_error_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + \ + return false; \ +} + +/* + * Functions to define, init and get a global monitor. + */ +#define DECLARE_DA_MON_INIT_GLOBAL(name, type) \ + \ +/* \ + * global monitor (a single variable) \ + */ \ +static struct da_monitor da_mon_##name; \ + \ +/* \ + * da_get_monitor_##name - return the global monitor address \ + */ \ +static struct da_monitor *da_get_monitor_##name(void) \ +{ \ + return &da_mon_##name; \ +} \ + \ +/* \ + * da_monitor_reset_all_##name - reset the single monitor \ + */ \ +static void da_monitor_reset_all_##name(void) \ +{ \ + da_monitor_reset_##name(da_get_monitor_##name()); \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize a monitor \ + */ \ +static inline int da_monitor_init_##name(void) \ +{ \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - destroy the monitor \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + return; \ +} + +/* + * Functions to define, init and get a per-cpu monitor. + */ +#define DECLARE_DA_MON_INIT_PER_CPU(name, type) \ + \ +/* \ + * per-cpu monitor variables \ + */ \ +DEFINE_PER_CPU(struct da_monitor, da_mon_##name); \ + \ +/* \ + * da_get_monitor_##name - return current CPU monitor address \ + */ \ +static struct da_monitor *da_get_monitor_##name(void) \ +{ \ + return this_cpu_ptr(&da_mon_##name); \ +} \ + \ +/* \ + * da_monitor_reset_all_##name - reset all CPUs' monitor \ + */ \ +static void da_monitor_reset_all_##name(void) \ +{ \ + struct da_monitor *da_mon; \ + int cpu; \ + for_each_cpu(cpu, cpu_online_mask) { \ + da_mon = per_cpu_ptr(&da_mon_##name, cpu); \ + da_monitor_reset_##name(da_mon); \ + } \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize all CPUs' monitor \ + */ \ +static inline int da_monitor_init_##name(void) \ +{ \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - destroy the monitor \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + return; \ +} + +/* + * Functions to define, init and get a per-task monitor. + */ +#define DECLARE_DA_MON_INIT_PER_TASK(name, type) \ + \ +/* \ + * The per-task monitor is stored a vector in the task struct. This variable \ + * stores the position on the vector reserved for this monitor. \ + */ \ +static int task_mon_slot_##name = RV_PER_TASK_MONITOR_INIT; \ + \ +/* \ + * da_get_monitor_##name - return the monitor in the allocated slot for tsk \ + */ \ +static inline struct da_monitor *da_get_monitor_##name(struct task_struct *tsk) \ +{ \ + return &tsk->rv[task_mon_slot_##name].da_mon; \ +} \ + \ +static void da_monitor_reset_all_##name(void) \ +{ \ + struct task_struct *g, *p; \ + \ + read_lock(&tasklist_lock); \ + for_each_process_thread(g, p) \ + da_monitor_reset_##name(da_get_monitor_##name(p)); \ + read_unlock(&tasklist_lock); \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize the per-task monitor \ + * \ + * Try to allocate a slot in the task's vector of monitors. If there \ + * is an available slot, use it and reset all task's monitor. \ + */ \ +static int da_monitor_init_##name(void) \ +{ \ + int slot; \ + \ + slot = rv_get_task_monitor_slot(); \ + if (slot < 0 || slot >= RV_PER_TASK_MONITOR_INIT) \ + return slot; \ + \ + task_mon_slot_##name = slot; \ + \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - return the allocated slot \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + if (task_mon_slot_##name == RV_PER_TASK_MONITOR_INIT) { \ + WARN_ONCE(1, "Disabling a disabled monitor: " #name); \ + return; \ + } \ + rv_put_task_monitor_slot(task_mon_slot_##name); \ + task_mon_slot_##name = RV_PER_TASK_MONITOR_INIT; \ + return; \ +} + +/* + * Handle event for implicit monitor: da_get_monitor_##name() will figure out + * the monitor. + */ +#define DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) \ + \ +static inline void __da_handle_event_##name(struct da_monitor *da_mon, \ + enum events_##name event) \ +{ \ + bool retval; \ + \ + retval = da_event_##name(da_mon, event); \ + if (!retval) \ + da_monitor_reset_##name(da_mon); \ +} \ + \ +/* \ + * da_handle_event_##name - handle an event \ + */ \ +static inline void da_handle_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon = da_get_monitor_##name(); \ + bool retval; \ + \ + retval = da_monitor_handling_event_##name(da_mon); \ + if (!retval) \ + return; \ + \ + __da_handle_event_##name(da_mon, event); \ +} \ + \ +/* \ + * da_handle_start_event_##name - start monitoring or handle event \ + * \ + * This function is used to notify the monitor that the system is returning \ + * to the initial state, so the monitor can start monitoring in the next event. \ + * Thus: \ + * \ + * If the monitor already started, handle the event. \ + * If the monitor did not start yet, start the monitor but skip the event. \ + */ \ +static inline bool da_handle_start_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) { \ + da_monitor_start_##name(da_mon); \ + return 0; \ + } \ + \ + __da_handle_event_##name(da_mon, event); \ + \ + return 1; \ +} \ + \ +/* \ + * da_handle_start_run_event_##name - start monitoring and handle event \ + * \ + * This function is used to notify the monitor that the system is in the \ + * initial state, so the monitor can start monitoring and handling event. \ + */ \ +static inline bool da_handle_start_run_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) \ + da_monitor_start_##name(da_mon); \ + \ + __da_handle_event_##name(da_mon, event); \ + \ + return 1; \ +} + +/* + * Handle event for per task. + */ +#define DECLARE_DA_MON_MONITOR_HANDLER_PER_TASK(name, type) \ + \ +static inline void \ +__da_handle_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ + enum events_##name event) \ +{ \ + bool retval; \ + \ + retval = da_event_##name(da_mon, tsk, event); \ + if (!retval) \ + da_monitor_reset_##name(da_mon); \ +} \ + \ +/* \ + * da_handle_event_##name - handle an event \ + */ \ +static inline void \ +da_handle_event_##name(struct task_struct *tsk, enum events_##name event) \ +{ \ + struct da_monitor *da_mon = da_get_monitor_##name(tsk); \ + bool retval; \ + \ + retval = da_monitor_handling_event_##name(da_mon); \ + if (!retval) \ + return; \ + \ + __da_handle_event_##name(da_mon, tsk, event); \ +} \ + \ +/* \ + * da_handle_start_event_##name - start monitoring or handle event \ + * \ + * This function is used to notify the monitor that the system is returning \ + * to the initial state, so the monitor can start monitoring in the next event. \ + * Thus: \ + * \ + * If the monitor already started, handle the event. \ + * If the monitor did not start yet, start the monitor but skip the event. \ + */ \ +static inline bool \ +da_handle_start_event_##name(struct task_struct *tsk, enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(tsk); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) { \ + da_monitor_start_##name(da_mon); \ + return 0; \ + } \ + \ + __da_handle_event_##name(da_mon, tsk, event); \ + \ + return 1; \ +} + +/* + * Entry point for the global monitor. + */ +#define DECLARE_DA_MON_GLOBAL(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ +DECLARE_DA_MON_INIT_GLOBAL(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) + +/* + * Entry point for the per-cpu monitor. + */ +#define DECLARE_DA_MON_PER_CPU(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ +DECLARE_DA_MON_INIT_PER_CPU(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) + +/* + * Entry point for the per-task monitor. + */ +#define DECLARE_DA_MON_PER_TASK(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ +DECLARE_DA_MON_INIT_PER_TASK(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_PER_TASK(name, type) diff --git a/include/rv/instrumentation.h b/include/rv/instrumentation.h new file mode 100644 index 000000000000..d4e7a02ede1a --- /dev/null +++ b/include/rv/instrumentation.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Helper functions to facilitate the instrumentation of auto-generated + * RV monitors create by dot2k. + * + * The dot2k tool is available at tools/verification/dot2/ + */ + +#include <linux/ftrace.h> + +/* + * rv_attach_trace_probe - check and attach a handler function to a tracepoint + */ +#define rv_attach_trace_probe(monitor, tp, rv_handler) \ + do { \ + check_trace_callback_type_##tp(rv_handler); \ + WARN_ONCE(register_trace_##tp(rv_handler, NULL), \ + "fail attaching " #monitor " " #tp "handler"); \ + } while (0) + +/* + * rv_detach_trace_probe - detach a handler function to a tracepoint + */ +#define rv_detach_trace_probe(monitor, tp, rv_handler) \ + do { \ + unregister_trace_##tp(rv_handler, NULL); \ + } while (0) diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 2814f188d98c..24969184c534 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -186,7 +186,7 @@ TRACE_EVENT(devlink_trap_report, __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) - __dynamic_array(char, input_dev_name, IFNAMSIZ) + __array(char, input_dev_name, IFNAMSIZ) ), TP_fast_assign( @@ -197,15 +197,14 @@ TRACE_EVENT(devlink_trap_report, __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(trap_name, metadata->trap_name); __assign_str(trap_group_name, metadata->trap_group_name); - __assign_str(input_dev_name, - (input_dev ? input_dev->name : "NULL")); + strscpy(__entry->input_dev_name, input_dev ? input_dev->name : "NULL", IFNAMSIZ); ), TP_printk("bus_name=%s dev_name=%s driver_name=%s trap_name=%s " "trap_group_name=%s input_dev_name=%s", __get_str(bus_name), __get_str(dev_name), __get_str(driver_name), __get_str(trap_name), __get_str(trap_group_name), - __get_str(input_dev_name)) + __entry->input_dev_name) ); #endif /* _TRACE_DEVLINK_H */ diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 6f2a4dc35e37..c2300c407f58 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -32,7 +32,7 @@ TRACE_EVENT(fib_table_lookup, __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) - __dynamic_array(char, name, IFNAMSIZ ) + __array(char, name, IFNAMSIZ ) ), TP_fast_assign( @@ -66,7 +66,7 @@ TRACE_EVENT(fib_table_lookup, } dev = nhc ? nhc->nhc_dev : NULL; - __assign_str(name, dev ? dev->name : "-"); + strlcpy(__entry->name, dev ? dev->name : "-", IFNAMSIZ); if (nhc) { if (nhc->nhc_gw_family == AF_INET) { @@ -95,7 +95,7 @@ TRACE_EVENT(fib_table_lookup, __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw4, __entry->gw6, __entry->err) + __entry->name, __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index c6abdcc77c12..6e821eb79450 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -31,7 +31,7 @@ TRACE_EVENT(fib6_table_lookup, __field( u16, dport ) __field( u8, proto ) __field( u8, rt_type ) - __dynamic_array( char, name, IFNAMSIZ ) + __array( char, name, IFNAMSIZ ) __array( __u8, gw, 16 ) ), @@ -63,9 +63,9 @@ TRACE_EVENT(fib6_table_lookup, } if (res->nh && res->nh->fib_nh_dev) { - __assign_str(name, res->nh->fib_nh_dev); + strlcpy(__entry->name, res->nh->fib_nh_dev->name, IFNAMSIZ); } else { - __assign_str(name, "-"); + strcpy(__entry->name, "-"); } if (res->f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; @@ -83,7 +83,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw, __entry->err) + __entry->name, __entry->gw, __entry->err) ); #endif /* _TRACE_FIB6_H */ diff --git a/include/trace/events/iscsi.h b/include/trace/events/iscsi.h index 87408faf6e4e..8ff2a3ca5d75 100644 --- a/include/trace/events/iscsi.h +++ b/include/trace/events/iscsi.h @@ -26,12 +26,12 @@ DECLARE_EVENT_CLASS(iscsi_log_msg, TP_STRUCT__entry( __string(dname, dev_name(dev) ) - __dynamic_array(char, msg, ISCSI_MSG_MAX ) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(dname, dev_name(dev)); - vsnprintf(__get_str(msg), ISCSI_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s",__get_str(dname), __get_str(msg) diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h index 62bb17516713..5eaa1fa99171 100644 --- a/include/trace/events/neigh.h +++ b/include/trace/events/neigh.h @@ -30,7 +30,7 @@ TRACE_EVENT(neigh_create, TP_STRUCT__entry( __field(u32, family) - __dynamic_array(char, dev, IFNAMSIZ ) + __string(dev, dev ? dev->name : "NULL") __field(int, entries) __field(u8, created) __field(u8, gc_exempt) diff --git a/include/trace/events/qla.h b/include/trace/events/qla.h index 5857cf682ee7..e7fd55e7dc3d 100644 --- a/include/trace/events/qla.h +++ b/include/trace/events/qla.h @@ -22,11 +22,11 @@ DECLARE_EVENT_CLASS(qla_log_event, TP_STRUCT__entry( __string(buf, buf) - __dynamic_array(char, msg, QLA_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(buf, buf); - vsnprintf(__get_str(msg), QLA_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s %s", __get_str(buf), __get_str(msg)) diff --git a/include/trace/events/rv.h b/include/trace/events/rv.h new file mode 100644 index 000000000000..56592da9301c --- /dev/null +++ b/include/trace/events/rv.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rv + +#if !defined(_TRACE_RV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RV_H + +#include <linux/rv.h> +#include <linux/tracepoint.h> + +#ifdef CONFIG_DA_MON_EVENTS_IMPLICIT +DECLARE_EVENT_CLASS(event_da_monitor, + + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + + TP_ARGS(state, event, next_state, final_state), + + TP_STRUCT__entry( + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + __array( char, next_state, MAX_DA_NAME_LEN ) + __field( bool, final_state ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); + __entry->final_state = final_state; + ), + + TP_printk("%s x %s -> %s %s", + __entry->state, + __entry->event, + __entry->next_state, + __entry->final_state ? "(final)" : "") +); + +DECLARE_EVENT_CLASS(error_da_monitor, + + TP_PROTO(char *state, char *event), + + TP_ARGS(state, event), + + TP_STRUCT__entry( + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + ), + + TP_printk("event %s not expected in the state %s", + __entry->event, + __entry->state) +); + +#ifdef CONFIG_RV_MON_WIP +DEFINE_EVENT(event_da_monitor, event_wip, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_wip, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_WIP */ +#endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ + +#ifdef CONFIG_DA_MON_EVENTS_ID +DECLARE_EVENT_CLASS(event_da_monitor_id, + + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + + TP_ARGS(id, state, event, next_state, final_state), + + TP_STRUCT__entry( + __field( int, id ) + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + __array( char, next_state, MAX_DA_NAME_LEN ) + __field( bool, final_state ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); + __entry->id = id; + __entry->final_state = final_state; + ), + + TP_printk("%d: %s x %s -> %s %s", + __entry->id, + __entry->state, + __entry->event, + __entry->next_state, + __entry->final_state ? "(final)" : "") +); + +DECLARE_EVENT_CLASS(error_da_monitor_id, + + TP_PROTO(int id, char *state, char *event), + + TP_ARGS(id, state, event), + + TP_STRUCT__entry( + __field( int, id ) + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + __entry->id = id; + ), + + TP_printk("%d: event %s not expected in the state %s", + __entry->id, + __entry->event, + __entry->state) +); + +#ifdef CONFIG_RV_MON_WWNR +/* id is the pid of the task */ +DEFINE_EVENT(event_da_monitor_id, event_wwnr, + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + TP_ARGS(id, state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor_id, error_wwnr, + TP_PROTO(int id, char *state, char *event), + TP_ARGS(id, state, event)); +#endif /* CONFIG_RV_MON_WWNR */ + +#endif /* CONFIG_DA_MON_EVENTS_ID */ +#endif /* _TRACE_RV_H */ + +/* This part ust be outside protection */ +#undef TRACE_INCLUDE_PATH +#include <trace/define_trace.h> diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h index a16783419687..1b7bab60434c 100644 --- a/include/trace/stages/stage1_struct_define.h +++ b/include/trace/stages/stage1_struct_define.h @@ -26,6 +26,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 42fd1e8813ec..1b7a8f764fdd 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -32,6 +32,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index e80cdc397a43..a8fb25f39a99 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -2,16 +2,18 @@ /* Stage 4 definitions for creating trace events */ +#define ALIGN_STRUCTFIELD(type) ((int)(__alignof__(struct {type b;}))) + #undef __field_ext #define __field_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = _filter_type }, #undef __field_struct_ext #define __field_struct_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ 0, .filter_type = _filter_type }, #undef __field @@ -23,7 +25,7 @@ #undef __array #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ - .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __dynamic_array @@ -38,6 +40,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 7ee5931300e6..fba4c24ed9e6 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -39,6 +39,10 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, \ + __trace_event_vstr_len(fmt, ap)) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __item_length = (len) * sizeof(type); \ diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index e1724f73594b..3c554a585320 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -24,6 +24,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); @@ -35,6 +38,15 @@ __get_str(dst)[len] = '\0'; \ } while(0) +#undef __assign_vstr +#define __assign_vstr(dst, fmt, va) \ + do { \ + va_list __cp_va; \ + va_copy(__cp_va, *(va)); \ + vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, __cp_va); \ + va_end(__cp_va); \ + } while (0) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/kernel/fork.c b/kernel/fork.c index 28772142022a..90c85b17bf69 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1965,6 +1965,18 @@ static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) mutex_unlock(&oom_adj_mutex); } +#ifdef CONFIG_RV +static void rv_task_fork(struct task_struct *p) +{ + int i; + + for (i = 0; i < RV_PER_TASK_MONITORS; i++) + p->rv[i].da_mon.monitoring = false; +} +#else +#define rv_task_fork(p) do {} while (0) +#endif + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2403,6 +2415,8 @@ static __latent_entropy struct task_struct *copy_process( */ copy_seccomp(p); + rv_task_fork(p); + rseq_fork(p, clone_flags); /* Don't start children in a dying pid namespace */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ccd6a5ade3e9..1052126bdca2 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1106,4 +1106,6 @@ config HIST_TRIGGERS_DEBUG If unsure, say N. +source "kernel/trace/rv/Kconfig" + endif # FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0d261774d6f3..c6651e16b557 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -106,5 +106,6 @@ obj-$(CONFIG_FPROBE) += fprobe.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o +obj-$(CONFIG_RV) += rv/ libftrace-y := ftrace.o diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig new file mode 100644 index 000000000000..831779607e84 --- /dev/null +++ b/kernel/trace/rv/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config DA_MON_EVENTS + bool + +config DA_MON_EVENTS_IMPLICIT + select DA_MON_EVENTS + bool + +config DA_MON_EVENTS_ID + select DA_MON_EVENTS + bool + +menuconfig RV + bool "Runtime Verification" + depends on TRACING + help + Enable the kernel runtime verification infrastructure. RV is a + lightweight (yet rigorous) method that complements classical + exhaustive verification techniques (such as model checking and + theorem proving). RV works by analyzing the trace of the system's + actual execution, comparing it against a formal specification of + the system behavior. + + For further information, see: + Documentation/trace/rv/runtime-verification.rst + +config RV_MON_WIP + depends on RV + depends on PREEMPT_TRACER + select DA_MON_EVENTS_IMPLICIT + bool "wip monitor" + help + Enable wip (wakeup in preemptive) sample monitor that illustrates + the usage of per-cpu monitors, and one limitation of the + preempt_disable/enable events. + + For further information, see: + Documentation/trace/rv/monitor_wip.rst + +config RV_MON_WWNR + depends on RV + select DA_MON_EVENTS_ID + bool "wwnr monitor" + help + Enable wwnr (wakeup while not running) sample monitor, this is a + sample monitor that illustrates the usage of per-task monitor. + The model is borken on purpose: it serves to test reactors. + + For further information, see: + Documentation/trace/rv/monitor_wwnr.rst + +config RV_REACTORS + bool "Runtime verification reactors" + default y + depends on RV + help + Enables the online runtime verification reactors. A runtime + monitor can cause a reaction to the detection of an exception + on the model's execution. By default, the monitors have + tracing reactions, printing the monitor output via tracepoints, + but other reactions can be added (on-demand) via this interface. + +config RV_REACT_PRINTK + bool "Printk reactor" + depends on RV_REACTORS + default y + help + Enables the printk reactor. The printk reactor emits a printk() + message if an exception is found. + +config RV_REACT_PANIC + bool "Panic reactor" + depends on RV_REACTORS + default y + help + Enables the panic reactor. The panic reactor emits a printk() + message if an exception is found and panic()s the system. diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile new file mode 100644 index 000000000000..963d14875b45 --- /dev/null +++ b/kernel/trace/rv/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_RV) += rv.o +obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o +obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o +obj-$(CONFIG_RV_REACTORS) += rv_reactors.o +obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o +obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c new file mode 100644 index 000000000000..83cace53b9fa --- /dev/null +++ b/kernel/trace/rv/monitors/wip/wip.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "wip" + +#include <trace/events/rv.h> +#include <trace/events/sched.h> +#include <trace/events/preemptirq.h> + +#include "wip.h" + +struct rv_monitor rv_wip; +DECLARE_DA_MON_PER_CPU(wip, unsigned char); + +static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_event_wip(preempt_disable_wip); +} + +static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_start_event_wip(preempt_enable_wip); +} + +static void handle_sched_waking(void *data, struct task_struct *task) +{ + da_handle_event_wip(sched_waking_wip); +} + +static int enable_wip(void) +{ + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", preempt_enable, handle_preempt_enable); + rv_attach_trace_probe("wip", sched_waking, handle_sched_waking); + rv_attach_trace_probe("wip", preempt_disable, handle_preempt_disable); + + return 0; +} + +static void disable_wip(void) +{ + rv_wip.enabled = 0; + + rv_detach_trace_probe("wip", preempt_disable, handle_preempt_disable); + rv_detach_trace_probe("wip", preempt_enable, handle_preempt_enable); + rv_detach_trace_probe("wip", sched_waking, handle_sched_waking); + + da_monitor_destroy_wip(); +} + +struct rv_monitor rv_wip = { + .name = "wip", + .description = "wakeup in preemptive per-cpu testing monitor.", + .enable = enable_wip, + .disable = disable_wip, + .reset = da_monitor_reset_all_wip, + .enabled = 0, +}; + +static int register_wip(void) +{ + rv_register_monitor(&rv_wip); + return 0; +} + +static void unregister_wip(void) +{ + rv_unregister_monitor(&rv_wip); +} + +module_init(register_wip); +module_exit(unregister_wip); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira <bristot@kernel.org>"); +MODULE_DESCRIPTION("wip: wakeup in preemptive - per-cpu sample monitor."); diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h new file mode 100644 index 000000000000..c1c47e2305ef --- /dev/null +++ b/kernel/trace/rv/monitors/wip/wip.h @@ -0,0 +1,46 @@ +/* + * Automatically generated C representation of wip automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_wip { + preemptive_wip = 0, + non_preemptive_wip, + state_max_wip +}; + +#define INVALID_STATE state_max_wip + +enum events_wip { + preempt_disable_wip = 0, + preempt_enable_wip, + sched_waking_wip, + event_max_wip +}; + +struct automaton_wip { + char *state_names[state_max_wip]; + char *event_names[event_max_wip]; + unsigned char function[state_max_wip][event_max_wip]; + unsigned char initial_state; + bool final_states[state_max_wip]; +}; + +struct automaton_wip automaton_wip = { + .state_names = { + "preemptive", + "non_preemptive" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "sched_waking" + }, + .function = { + { non_preemptive_wip, INVALID_STATE, INVALID_STATE }, + { INVALID_STATE, preemptive_wip, non_preemptive_wip }, + }, + .initial_state = preemptive_wip, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c new file mode 100644 index 000000000000..599225d9cf38 --- /dev/null +++ b/kernel/trace/rv/monitors/wwnr/wwnr.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "wwnr" + +#include <trace/events/rv.h> +#include <trace/events/sched.h> + +#include "wwnr.h" + +struct rv_monitor rv_wwnr; +DECLARE_DA_MON_PER_TASK(wwnr, unsigned char); + +static void handle_switch(void *data, bool preempt, struct task_struct *p, + struct task_struct *n, unsigned int prev_state) +{ + /* start monitoring only after the first suspension */ + if (prev_state == TASK_INTERRUPTIBLE) + da_handle_start_event_wwnr(p, switch_out_wwnr); + else + da_handle_event_wwnr(p, switch_out_wwnr); + + da_handle_event_wwnr(n, switch_in_wwnr); +} + +static void handle_wakeup(void *data, struct task_struct *p) +{ + da_handle_event_wwnr(p, wakeup_wwnr); +} + +static int enable_wwnr(void) +{ + int retval; + + retval = da_monitor_init_wwnr(); + if (retval) + return retval; + + rv_attach_trace_probe("wwnr", sched_switch, handle_switch); + rv_attach_trace_probe("wwnr", sched_wakeup, handle_wakeup); + + return 0; +} + +static void disable_wwnr(void) +{ + rv_wwnr.enabled = 0; + + rv_detach_trace_probe("wwnr", sched_switch, handle_switch); + rv_detach_trace_probe("wwnr", sched_wakeup, handle_wakeup); + + da_monitor_destroy_wwnr(); +} + +struct rv_monitor rv_wwnr = { + .name = "wwnr", + .description = "wakeup while not running per-task testing model.", + .enable = enable_wwnr, + .disable = disable_wwnr, + .reset = da_monitor_reset_all_wwnr, + .enabled = 0, +}; + +static int register_wwnr(void) +{ + rv_register_monitor(&rv_wwnr); + return 0; +} + +static void unregister_wwnr(void) +{ + rv_unregister_monitor(&rv_wwnr); +} + +module_init(register_wwnr); +module_exit(unregister_wwnr); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira <bristot@kernel.org>"); +MODULE_DESCRIPTION("wwnr: wakeup while not running monitor"); diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h new file mode 100644 index 000000000000..d1afe55cdd4c --- /dev/null +++ b/kernel/trace/rv/monitors/wwnr/wwnr.h @@ -0,0 +1,46 @@ +/* + * Automatically generated C representation of wwnr automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_wwnr { + not_running_wwnr = 0, + running_wwnr, + state_max_wwnr +}; + +#define INVALID_STATE state_max_wwnr + +enum events_wwnr { + switch_in_wwnr = 0, + switch_out_wwnr, + wakeup_wwnr, + event_max_wwnr +}; + +struct automaton_wwnr { + char *state_names[state_max_wwnr]; + char *event_names[event_max_wwnr]; + unsigned char function[state_max_wwnr][event_max_wwnr]; + unsigned char initial_state; + bool final_states[state_max_wwnr]; +}; + +struct automaton_wwnr automaton_wwnr = { + .state_names = { + "not_running", + "running" + }, + .event_names = { + "switch_in", + "switch_out", + "wakeup" + }, + .function = { + { running_wwnr, INVALID_STATE, not_running_wwnr }, + { INVALID_STATE, not_running_wwnr, INVALID_STATE }, + }, + .initial_state = not_running_wwnr, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c new file mode 100644 index 000000000000..b698d05dd069 --- /dev/null +++ b/kernel/trace/rv/reactor_panic.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Panic RV reactor: + * Prints the exception msg to the kernel message log and panic(). + */ + +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> + +static void rv_panic_reaction(char *msg) +{ + panic(msg); +} + +static struct rv_reactor rv_panic = { + .name = "panic", + .description = "panic the system if an exception is found.", + .react = rv_panic_reaction +}; + +static int register_react_panic(void) +{ + rv_register_reactor(&rv_panic); + return 0; +} + +static void unregister_react_panic(void) +{ + rv_unregister_reactor(&rv_panic); +} + +module_init(register_react_panic); +module_exit(unregister_react_panic); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira"); +MODULE_DESCRIPTION("panic rv reactor: panic if an exception is found."); diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c new file mode 100644 index 000000000000..31899f953af4 --- /dev/null +++ b/kernel/trace/rv/reactor_printk.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Printk RV reactor: + * Prints the exception msg to the kernel message log. + */ +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> + +static void rv_printk_reaction(char *msg) +{ + printk_deferred(msg); +} + +static struct rv_reactor rv_printk = { + .name = "printk", + .description = "prints the exception msg to the kernel message log.", + .react = rv_printk_reaction +}; + +static int register_react_printk(void) +{ + rv_register_reactor(&rv_printk); + return 0; +} + +static void unregister_react_printk(void) +{ + rv_unregister_reactor(&rv_printk); +} + +module_init(register_react_printk); +module_exit(unregister_react_printk); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira"); +MODULE_DESCRIPTION("printk rv reactor: printk if an exception is hit."); diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c new file mode 100644 index 000000000000..6c97cc2d754a --- /dev/null +++ b/kernel/trace/rv/rv.c @@ -0,0 +1,799 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * This is the online Runtime Verification (RV) interface. + * + * RV is a lightweight (yet rigorous) method that complements classical + * exhaustive verification techniques (such as model checking and + * theorem proving) with a more practical approach to complex systems. + * + * RV works by analyzing the trace of the system's actual execution, + * comparing it against a formal specification of the system behavior. + * RV can give precise information on the runtime behavior of the + * monitored system while enabling the reaction for unexpected + * events, avoiding, for example, the propagation of a failure on + * safety-critical systems. + * + * The development of this interface roots in the development of the + * paper: + * + * De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo + * Silva. Efficient formal verification for the Linux kernel. In: + * International Conference on Software Engineering and Formal Methods. + * Springer, Cham, 2019. p. 315-332. + * + * And: + * + * De Oliveira, Daniel Bristot, et al. Automata-based formal analysis + * and verification of the real-time Linux kernel. PhD Thesis, 2020. + * + * == Runtime monitor interface == + * + * A monitor is the central part of the runtime verification of a system. + * + * The monitor stands in between the formal specification of the desired + * (or undesired) behavior, and the trace of the actual system. + * + * In Linux terms, the runtime verification monitors are encapsulated + * inside the "RV monitor" abstraction. A RV monitor includes a reference + * model of the system, a set of instances of the monitor (per-cpu monitor, + * per-task monitor, and so on), and the helper functions that glue the + * monitor to the system via trace. Generally, a monitor includes some form + * of trace output as a reaction for event parsing and exceptions, + * as depicted bellow: + * + * Linux +----- RV Monitor ----------------------------------+ Formal + * Realm | | Realm + * +-------------------+ +----------------+ +-----------------+ + * | Linux kernel | | Monitor | | Reference | + * | Tracing | -> | Instance(s) | <- | Model | + * | (instrumentation) | | (verification) | | (specification) | + * +-------------------+ +----------------+ +-----------------+ + * | | | + * | V | + * | +----------+ | + * | | Reaction | | + * | +--+--+--+-+ | + * | | | | | + * | | | +-> trace output ? | + * +------------------------|--|----------------------+ + * | +----> panic ? + * +-------> <user-specified> + * + * This file implements the interface for loading RV monitors, and + * to control the verification session. + * + * == Registering monitors == + * + * The struct rv_monitor defines a set of callback functions to control + * a verification session. For instance, when a given monitor is enabled, + * the "enable" callback function is called to hook the instrumentation + * functions to the kernel trace events. The "disable" function is called + * when disabling the verification session. + * + * A RV monitor is registered via: + * int rv_register_monitor(struct rv_monitor *monitor); + * And unregistered via: + * int rv_unregister_monitor(struct rv_monitor *monitor); + * + * == User interface == + * + * The user interface resembles kernel tracing interface. It presents + * these files: + * + * "available_monitors" + * - List the available monitors, one per line. + * + * For example: + * # cat available_monitors + * wip + * wwnr + * + * "enabled_monitors" + * - Lists the enabled monitors, one per line; + * - Writing to it enables a given monitor; + * - Writing a monitor name with a '!' prefix disables it; + * - Truncating the file disables all enabled monitors. + * + * For example: + * # cat enabled_monitors + * # echo wip > enabled_monitors + * # echo wwnr >> enabled_monitors + * # cat enabled_monitors + * wip + * wwnr + * # echo '!wip' >> enabled_monitors + * # cat enabled_monitors + * wwnr + * # echo > enabled_monitors + * # cat enabled_monitors + * # + * + * Note that more than one monitor can be enabled concurrently. + * + * "monitoring_on" + * - It is an on/off general switcher for monitoring. Note + * that it does not disable enabled monitors or detach events, + * but stops the per-entity monitors from monitoring the events + * received from the instrumentation. It resembles the "tracing_on" + * switcher. + * + * "monitors/" + * Each monitor will have its own directory inside "monitors/". There + * the monitor specific files will be presented. + * The "monitors/" directory resembles the "events" directory on + * tracefs. + * + * For example: + * # cd monitors/wip/ + * # ls + * desc enable + * # cat desc + * auto-generated wakeup in preemptive monitor. + * # cat enable + * 0 + * + * For further information, see: + * Documentation/trace/rv/runtime-verification.rst + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> + +#ifdef CONFIG_DA_MON_EVENTS +#define CREATE_TRACE_POINTS +#include <trace/events/rv.h> +#endif + +#include "rv.h" + +DEFINE_MUTEX(rv_interface_lock); + +static struct rv_interface rv_root; + +struct dentry *get_monitors_root(void) +{ + return rv_root.monitors_dir; +} + +/* + * Interface for the monitor register. + */ +static LIST_HEAD(rv_monitors_list); + +static int task_monitor_count; +static bool task_monitor_slots[RV_PER_TASK_MONITORS]; + +int rv_get_task_monitor_slot(void) +{ + int i; + + lockdep_assert_held(&rv_interface_lock); + + if (task_monitor_count == RV_PER_TASK_MONITORS) + return -EBUSY; + + task_monitor_count++; + + for (i = 0; i < RV_PER_TASK_MONITORS; i++) { + if (task_monitor_slots[i] == false) { + task_monitor_slots[i] = true; + return i; + } + } + + WARN_ONCE(1, "RV task_monitor_count and slots are out of sync\n"); + + return -EINVAL; +} + +void rv_put_task_monitor_slot(int slot) +{ + lockdep_assert_held(&rv_interface_lock); + + if (slot < 0 || slot >= RV_PER_TASK_MONITORS) { + WARN_ONCE(1, "RV releasing an invalid slot!: %d\n", slot); + return; + } + + WARN_ONCE(!task_monitor_slots[slot], "RV releasing unused task_monitor_slots: %d\n", + slot); + + task_monitor_count--; + task_monitor_slots[slot] = false; +} + +/* + * This section collects the monitor/ files and folders. + */ +static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + const char *buff; + + buff = mdef->monitor->enabled ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1); +} + +/* + * __rv_disable_monitor - disabled an enabled monitor + */ +static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) +{ + lockdep_assert_held(&rv_interface_lock); + + if (mdef->monitor->enabled) { + mdef->monitor->enabled = 0; + mdef->monitor->disable(); + + /* + * Wait for the execution of all events to finish. + * Otherwise, the data used by the monitor could + * be inconsistent. i.e., if the monitor is re-enabled. + */ + if (sync) + tracepoint_synchronize_unregister(); + return 1; + } + return 0; +} + +/** + * rv_disable_monitor - disable a given runtime monitor + * + * Returns 0 on success. + */ +int rv_disable_monitor(struct rv_monitor_def *mdef) +{ + __rv_disable_monitor(mdef, true); + return 0; +} + +/** + * rv_enable_monitor - enable a given runtime monitor + * + * Returns 0 on success, error otherwise. + */ +int rv_enable_monitor(struct rv_monitor_def *mdef) +{ + int retval; + + lockdep_assert_held(&rv_interface_lock); + + if (mdef->monitor->enabled) + return 0; + + retval = mdef->monitor->enable(); + + if (!retval) + mdef->monitor->enabled = 1; + + return retval; +} + +/* + * interface for enabling/disabling a monitor. + */ +static ssize_t monitor_enable_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + retval = count; + + mutex_lock(&rv_interface_lock); + + if (val) + retval = rv_enable_monitor(mdef); + else + retval = rv_disable_monitor(mdef); + + mutex_unlock(&rv_interface_lock); + + return retval ? : count; +} + +static const struct file_operations interface_enable_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = monitor_enable_write_data, + .read = monitor_enable_read_data, +}; + +/* + * Interface to read monitors description. + */ +static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + char buff[256]; + + memset(buff, 0, sizeof(buff)); + + snprintf(buff, sizeof(buff), "%s\n", mdef->monitor->description); + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1); +} + +static const struct file_operations interface_desc_fops = { + .open = simple_open, + .llseek = no_llseek, + .read = monitor_desc_read_data, +}; + +/* + * During the registration of a monitor, this function creates + * the monitor dir, where the specific options of the monitor + * are exposed. + */ +static int create_monitor_dir(struct rv_monitor_def *mdef) +{ + struct dentry *root = get_monitors_root(); + const char *name = mdef->monitor->name; + struct dentry *tmp; + int retval; + + mdef->root_d = rv_create_dir(name, root); + if (!mdef->root_d) + return -ENOMEM; + + tmp = rv_create_file("enable", RV_MODE_WRITE, mdef->root_d, mdef, &interface_enable_fops); + if (!tmp) { + retval = -ENOMEM; + goto out_remove_root; + } + + tmp = rv_create_file("desc", RV_MODE_READ, mdef->root_d, mdef, &interface_desc_fops); + if (!tmp) { + retval = -ENOMEM; + goto out_remove_root; + } + + retval = reactor_populate_monitor(mdef); + if (retval) + goto out_remove_root; + + return 0; + +out_remove_root: + rv_remove(mdef->root_d); + return retval; +} + +/* + * Available/Enable monitor shared seq functions. + */ +static int monitors_show(struct seq_file *m, void *p) +{ + struct rv_monitor_def *mon_def = p; + + seq_printf(m, "%s\n", mon_def->monitor->name); + return 0; +} + +/* + * Used by the seq file operations at the end of a read + * operation. + */ +static void monitors_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&rv_interface_lock); +} + +/* + * Available monitor seq functions. + */ +static void *available_monitors_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&rv_interface_lock); + return seq_list_start(&rv_monitors_list, *pos); +} + +static void *available_monitors_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &rv_monitors_list, pos); +} + +/* + * Enable monitor seq functions. + */ +static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct rv_monitor_def *m_def = p; + + (*pos)++; + + list_for_each_entry_continue(m_def, &rv_monitors_list, list) { + if (m_def->monitor->enabled) + return m_def; + } + + return NULL; +} + +static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) +{ + struct rv_monitor_def *m_def; + loff_t l; + + mutex_lock(&rv_interface_lock); + + if (list_empty(&rv_monitors_list)) + return NULL; + + m_def = list_entry(&rv_monitors_list, struct rv_monitor_def, list); + + for (l = 0; l <= *pos; ) { + m_def = enabled_monitors_next(m, m_def, &l); + if (!m_def) + break; + } + + return m_def; +} + +/* + * available/enabled monitors seq definition. + */ +static const struct seq_operations available_monitors_seq_ops = { + .start = available_monitors_start, + .next = available_monitors_next, + .stop = monitors_stop, + .show = monitors_show +}; + +static const struct seq_operations enabled_monitors_seq_ops = { + .start = enabled_monitors_start, + .next = enabled_monitors_next, + .stop = monitors_stop, + .show = monitors_show +}; + +/* + * available_monitors interface. + */ +static int available_monitors_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &available_monitors_seq_ops); +}; + +static const struct file_operations available_monitors_ops = { + .open = available_monitors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* + * enabled_monitors interface. + */ +static void disable_all_monitors(void) +{ + struct rv_monitor_def *mdef; + int enabled = 0; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry(mdef, &rv_monitors_list, list) + enabled += __rv_disable_monitor(mdef, false); + + if (enabled) { + /* + * Wait for the execution of all events to finish. + * Otherwise, the data used by the monitor could + * be inconsistent. i.e., if the monitor is re-enabled. + */ + tracepoint_synchronize_unregister(); + } + + mutex_unlock(&rv_interface_lock); +} + +static int enabled_monitors_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + disable_all_monitors(); + + return seq_open(file, &enabled_monitors_seq_ops); +}; + +static ssize_t enabled_monitors_write(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buff[MAX_RV_MONITOR_NAME_SIZE + 2]; + struct rv_monitor_def *mdef; + int retval = -EINVAL; + bool enable = true; + char *ptr = buff; + int len; + + if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1) + return -EINVAL; + + memset(buff, 0, sizeof(buff)); + + retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count); + if (retval < 0) + return -EFAULT; + + ptr = strim(buff); + + if (ptr[0] == '!') { + enable = false; + ptr++; + } + + len = strlen(ptr); + if (!len) + return count; + + mutex_lock(&rv_interface_lock); + + retval = -EINVAL; + + list_for_each_entry(mdef, &rv_monitors_list, list) { + if (strcmp(ptr, mdef->monitor->name) != 0) + continue; + + /* + * Monitor found! + */ + if (enable) + retval = rv_enable_monitor(mdef); + else + retval = rv_disable_monitor(mdef); + + if (!retval) + retval = count; + + break; + } + + mutex_unlock(&rv_interface_lock); + return retval; +} + +static const struct file_operations enabled_monitors_ops = { + .open = enabled_monitors_open, + .read = seq_read, + .write = enabled_monitors_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * Monitoring on global switcher! + */ +static bool __read_mostly monitoring_on; + +/** + * rv_monitoring_on - checks if monitoring is on + * + * Returns 1 if on, 0 otherwise. + */ +bool rv_monitoring_on(void) +{ + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_rmb(); + return READ_ONCE(monitoring_on); +} + +/* + * monitoring_on general switcher. + */ +static ssize_t monitoring_on_read_data(struct file *filp, char __user *user_buf, + size_t count, loff_t *ppos) +{ + const char *buff; + + buff = rv_monitoring_on() ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1); +} + +static void turn_monitoring_off(void) +{ + WRITE_ONCE(monitoring_on, false); + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_wmb(); +} + +static void reset_all_monitors(void) +{ + struct rv_monitor_def *mdef; + + list_for_each_entry(mdef, &rv_monitors_list, list) { + if (mdef->monitor->enabled) + mdef->monitor->reset(); + } +} + +static void turn_monitoring_on(void) +{ + WRITE_ONCE(monitoring_on, true); + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_wmb(); +} + +static void turn_monitoring_on_with_reset(void) +{ + lockdep_assert_held(&rv_interface_lock); + + if (rv_monitoring_on()) + return; + + /* + * Monitors might be out of sync with the system if events were not + * processed because of !rv_monitoring_on(). + * + * Reset all monitors, forcing a re-sync. + */ + reset_all_monitors(); + turn_monitoring_on(); +} + +static ssize_t monitoring_on_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + mutex_lock(&rv_interface_lock); + + if (val) + turn_monitoring_on_with_reset(); + else + turn_monitoring_off(); + + /* + * Wait for the execution of all events to finish + * before returning to user-space. + */ + tracepoint_synchronize_unregister(); + + mutex_unlock(&rv_interface_lock); + + return count; +} + +static const struct file_operations monitoring_on_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = monitoring_on_write_data, + .read = monitoring_on_read_data, +}; + +static void destroy_monitor_dir(struct rv_monitor_def *mdef) +{ + reactor_cleanup_monitor(mdef); + rv_remove(mdef->root_d); +} + +/** + * rv_register_monitor - register a rv monitor. + * @monitor: The rv_monitor to be registered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_register_monitor(struct rv_monitor *monitor) +{ + struct rv_monitor_def *r; + int retval = 0; + + if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) { + pr_info("Monitor %s has a name longer than %d\n", monitor->name, + MAX_RV_MONITOR_NAME_SIZE); + return -1; + } + + mutex_lock(&rv_interface_lock); + + list_for_each_entry(r, &rv_monitors_list, list) { + if (strcmp(monitor->name, r->monitor->name) == 0) { + pr_info("Monitor %s is already registered\n", monitor->name); + retval = -1; + goto out_unlock; + } + } + + r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL); + if (!r) { + retval = -ENOMEM; + goto out_unlock; + } + + r->monitor = monitor; + + retval = create_monitor_dir(r); + if (retval) { + kfree(r); + goto out_unlock; + } + + list_add_tail(&r->list, &rv_monitors_list); + +out_unlock: + mutex_unlock(&rv_interface_lock); + return retval; +} + +/** + * rv_unregister_monitor - unregister a rv monitor. + * @monitor: The rv_monitor to be unregistered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_unregister_monitor(struct rv_monitor *monitor) +{ + struct rv_monitor_def *ptr, *next; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry_safe(ptr, next, &rv_monitors_list, list) { + if (strcmp(monitor->name, ptr->monitor->name) == 0) { + rv_disable_monitor(ptr); + list_del(&ptr->list); + destroy_monitor_dir(ptr); + } + } + + mutex_unlock(&rv_interface_lock); + return 0; +} + +int __init rv_init_interface(void) +{ + struct dentry *tmp; + int retval; + + rv_root.root_dir = rv_create_dir("rv", NULL); + if (!rv_root.root_dir) + goto out_err; + + rv_root.monitors_dir = rv_create_dir("monitors", rv_root.root_dir); + if (!rv_root.monitors_dir) + goto out_err; + + tmp = rv_create_file("available_monitors", RV_MODE_READ, rv_root.root_dir, NULL, + &available_monitors_ops); + if (!tmp) + goto out_err; + + tmp = rv_create_file("enabled_monitors", RV_MODE_WRITE, rv_root.root_dir, NULL, + &enabled_monitors_ops); + if (!tmp) + goto out_err; + + tmp = rv_create_file("monitoring_on", RV_MODE_WRITE, rv_root.root_dir, NULL, + &monitoring_on_fops); + if (!tmp) + goto out_err; + retval = init_rv_reactors(rv_root.root_dir); + if (retval) + goto out_err; + + turn_monitoring_on(); + + return 0; + +out_err: + rv_remove(rv_root.root_dir); + printk(KERN_ERR "RV: Error while creating the RV interface\n"); + return 1; +} diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h new file mode 100644 index 000000000000..db6cb0913dbd --- /dev/null +++ b/kernel/trace/rv/rv.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/mutex.h> + +struct rv_interface { + struct dentry *root_dir; + struct dentry *monitors_dir; +}; + +#include "../trace.h" +#include <linux/tracefs.h> +#include <linux/rv.h> + +#define RV_MODE_WRITE TRACE_MODE_WRITE +#define RV_MODE_READ TRACE_MODE_READ + +#define rv_create_dir tracefs_create_dir +#define rv_create_file tracefs_create_file +#define rv_remove tracefs_remove + +#define MAX_RV_MONITOR_NAME_SIZE 32 +#define MAX_RV_REACTOR_NAME_SIZE 32 + +extern struct mutex rv_interface_lock; + +#ifdef CONFIG_RV_REACTORS +struct rv_reactor_def { + struct list_head list; + struct rv_reactor *reactor; + /* protected by the monitor interface lock */ + int counter; +}; +#endif + +struct rv_monitor_def { + struct list_head list; + struct rv_monitor *monitor; + struct dentry *root_d; +#ifdef CONFIG_RV_REACTORS + struct rv_reactor_def *rdef; + bool reacting; +#endif + bool task_monitor; +}; + +struct dentry *get_monitors_root(void); +int rv_disable_monitor(struct rv_monitor_def *mdef); +int rv_enable_monitor(struct rv_monitor_def *mdef); + +#ifdef CONFIG_RV_REACTORS +int reactor_populate_monitor(struct rv_monitor_def *mdef); +void reactor_cleanup_monitor(struct rv_monitor_def *mdef); +int init_rv_reactors(struct dentry *root_dir); +#else +static inline int reactor_populate_monitor(struct rv_monitor_def *mdef) +{ + return 0; +} + +static inline void reactor_cleanup_monitor(struct rv_monitor_def *mdef) +{ + return; +} + +static inline int init_rv_reactors(struct dentry *root_dir) +{ + return 0; +} +#endif diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c new file mode 100644 index 000000000000..6aae106695b6 --- /dev/null +++ b/kernel/trace/rv/rv_reactors.c @@ -0,0 +1,510 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> + * + * Runtime reactor interface. + * + * A runtime monitor can cause a reaction to the detection of an + * exception on the model's execution. By default, the monitors have + * tracing reactions, printing the monitor output via tracepoints. + * But other reactions can be added (on-demand) via this interface. + * + * == Registering reactors == + * + * The struct rv_reactor defines a callback function to be executed + * in case of a model exception happens. The callback function + * receives a message to be (optionally) printed before executing + * the reaction. + * + * A RV reactor is registered via: + * int rv_register_reactor(struct rv_reactor *reactor) + * And unregistered via: + * int rv_unregister_reactor(struct rv_reactor *reactor) + * + * These functions are exported to modules, enabling reactors to be + * dynamically loaded. + * + * == User interface == + * + * The user interface resembles the kernel tracing interface and + * presents these files: + * + * "available_reactors" + * - List the available reactors, one per line. + * + * For example: + * # cat available_reactors + * nop + * panic + * printk + * + * "reacting_on" + * - It is an on/off general switch for reactors, disabling + * all reactions. + * + * "monitors/MONITOR/reactors" + * - List available reactors, with the select reaction for the given + * MONITOR inside []. The default one is the nop (no operation) + * reactor. + * - Writing the name of an reactor enables it to the given + * MONITOR. + * + * For example: + * # cat monitors/wip/reactors + * [nop] + * panic + * printk + * # echo panic > monitors/wip/reactors + * # cat monitors/wip/reactors + * nop + * [panic] + * printk + */ + +#include <linux/slab.h> + +#include "rv.h" + +/* + * Interface for the reactor register. + */ +static LIST_HEAD(rv_reactors_list); + +static struct rv_reactor_def *get_reactor_rdef_by_name(char *name) +{ + struct rv_reactor_def *r; + + list_for_each_entry(r, &rv_reactors_list, list) { + if (strcmp(name, r->reactor->name) == 0) + return r; + } + return NULL; +} + +/* + * Available reactors seq functions. + */ +static int reactors_show(struct seq_file *m, void *p) +{ + struct rv_reactor_def *rea_def = p; + + seq_printf(m, "%s\n", rea_def->reactor->name); + return 0; +} + +static void reactors_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&rv_interface_lock); +} + +static void *reactors_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&rv_interface_lock); + return seq_list_start(&rv_reactors_list, *pos); +} + +static void *reactors_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &rv_reactors_list, pos); +} + +/* + * available_reactors seq definition. + */ +static const struct seq_operations available_reactors_seq_ops = { + .start = reactors_start, + .next = reactors_next, + .stop = reactors_stop, + .show = reactors_show +}; + +/* + * available_reactors interface. + */ +static int available_reactors_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &available_reactors_seq_ops); +}; + +static const struct file_operations available_reactors_ops = { + .open = available_reactors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* + * Monitor's reactor file. + */ +static int monitor_reactor_show(struct seq_file *m, void *p) +{ + struct rv_monitor_def *mdef = m->private; + struct rv_reactor_def *rdef = p; + + if (mdef->rdef == rdef) + seq_printf(m, "[%s]\n", rdef->reactor->name); + else + seq_printf(m, "%s\n", rdef->reactor->name); + return 0; +} + +/* + * available_reactors seq definition. + */ +static const struct seq_operations monitor_reactors_seq_ops = { + .start = reactors_start, + .next = reactors_next, + .stop = reactors_stop, + .show = monitor_reactor_show +}; + +static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor_def *rdef, + bool reacting) +{ + bool monitor_enabled; + + /* nothing to do */ + if (mdef->rdef == rdef) + return; + + monitor_enabled = mdef->monitor->enabled; + if (monitor_enabled) + rv_disable_monitor(mdef); + + /* swap reactor's usage */ + mdef->rdef->counter--; + rdef->counter++; + + mdef->rdef = rdef; + mdef->reacting = reacting; + mdef->monitor->react = rdef->reactor->react; + + if (monitor_enabled) + rv_enable_monitor(mdef); +} + +static ssize_t +monitor_reactors_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buff[MAX_RV_REACTOR_NAME_SIZE + 2]; + struct rv_monitor_def *mdef; + struct rv_reactor_def *rdef; + struct seq_file *seq_f; + int retval = -EINVAL; + bool enable; + char *ptr; + int len; + + if (count < 1 || count > MAX_RV_REACTOR_NAME_SIZE + 1) + return -EINVAL; + + memset(buff, 0, sizeof(buff)); + + retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count); + if (retval < 0) + return -EFAULT; + + ptr = strim(buff); + + len = strlen(ptr); + if (!len) + return count; + + /* + * See monitor_reactors_open() + */ + seq_f = file->private_data; + mdef = seq_f->private; + + mutex_lock(&rv_interface_lock); + + retval = -EINVAL; + + list_for_each_entry(rdef, &rv_reactors_list, list) { + if (strcmp(ptr, rdef->reactor->name) != 0) + continue; + + if (rdef == get_reactor_rdef_by_name("nop")) + enable = false; + else + enable = true; + + monitor_swap_reactors(mdef, rdef, enable); + + retval = count; + break; + } + + mutex_unlock(&rv_interface_lock); + + return retval; +} + +/* + * available_reactors interface. + */ +static int monitor_reactors_open(struct inode *inode, struct file *file) +{ + struct rv_monitor_def *mdef = inode->i_private; + struct seq_file *seq_f; + int ret; + + ret = seq_open(file, &monitor_reactors_seq_ops); + if (ret < 0) + return ret; + + /* + * seq_open stores the seq_file on the file->private data. + */ + seq_f = file->private_data; + + /* + * Copy the create file "private" data to the seq_file private data. + */ + seq_f->private = mdef; + + return 0; +}; + +static const struct file_operations monitor_reactors_ops = { + .open = monitor_reactors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = monitor_reactors_write +}; + +static int __rv_register_reactor(struct rv_reactor *reactor) +{ + struct rv_reactor_def *r; + + list_for_each_entry(r, &rv_reactors_list, list) { + if (strcmp(reactor->name, r->reactor->name) == 0) { + pr_info("Reactor %s is already registered\n", reactor->name); + return -EINVAL; + } + } + + r = kzalloc(sizeof(struct rv_reactor_def), GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->reactor = reactor; + r->counter = 0; + + list_add_tail(&r->list, &rv_reactors_list); + + return 0; +} + +/** + * rv_register_reactor - register a rv reactor. + * @reactor: The rv_reactor to be registered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_register_reactor(struct rv_reactor *reactor) +{ + int retval = 0; + + if (strlen(reactor->name) >= MAX_RV_REACTOR_NAME_SIZE) { + pr_info("Reactor %s has a name longer than %d\n", + reactor->name, MAX_RV_MONITOR_NAME_SIZE); + return -EINVAL; + } + + mutex_lock(&rv_interface_lock); + retval = __rv_register_reactor(reactor); + mutex_unlock(&rv_interface_lock); + return retval; +} + +/** + * rv_unregister_reactor - unregister a rv reactor. + * @reactor: The rv_reactor to be unregistered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_unregister_reactor(struct rv_reactor *reactor) +{ + struct rv_reactor_def *ptr, *next; + int ret = 0; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry_safe(ptr, next, &rv_reactors_list, list) { + if (strcmp(reactor->name, ptr->reactor->name) == 0) { + + if (!ptr->counter) { + list_del(&ptr->list); + } else { + printk(KERN_WARNING + "rv: the rv_reactor %s is in use by %d monitor(s)\n", + ptr->reactor->name, ptr->counter); + printk(KERN_WARNING "rv: the rv_reactor %s cannot be removed\n", + ptr->reactor->name); + ret = -EBUSY; + break; + } + } + } + + mutex_unlock(&rv_interface_lock); + return ret; +} + +/* + * reacting_on interface. + */ +static bool __read_mostly reacting_on; + +/** + * rv_reacting_on - checks if reacting is on + * + * Returns 1 if on, 0 otherwise. + */ +bool rv_reacting_on(void) +{ + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_rmb(); + return READ_ONCE(reacting_on); +} + +static ssize_t reacting_on_read_data(struct file *filp, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + char *buff; + + buff = rv_reacting_on() ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1); +} + +static void turn_reacting_off(void) +{ + WRITE_ONCE(reacting_on, false); + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_wmb(); +} + +static void turn_reacting_on(void) +{ + WRITE_ONCE(reacting_on, true); + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_wmb(); +} + +static ssize_t reacting_on_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + mutex_lock(&rv_interface_lock); + + if (val) + turn_reacting_on(); + else + turn_reacting_off(); + + /* + * Wait for the execution of all events to finish + * before returning to user-space. + */ + tracepoint_synchronize_unregister(); + + mutex_unlock(&rv_interface_lock); + + return count; +} + +static const struct file_operations reacting_on_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = reacting_on_write_data, + .read = reacting_on_read_data, +}; + +/** + * reactor_populate_monitor - creates per monitor reactors file + * @mdef: monitor's definition. + * + * Returns 0 if successful, error otherwise. + */ +int reactor_populate_monitor(struct rv_monitor_def *mdef) +{ + struct dentry *tmp; + + tmp = rv_create_file("reactors", RV_MODE_WRITE, mdef->root_d, mdef, &monitor_reactors_ops); + if (!tmp) + return -ENOMEM; + + /* + * Configure as the rv_nop reactor. + */ + mdef->rdef = get_reactor_rdef_by_name("nop"); + mdef->rdef->counter++; + mdef->reacting = false; + + return 0; +} + +/** + * reactor_cleanup_monitor - cleanup a monitor reference + * @mdef: monitor's definition. + */ +void reactor_cleanup_monitor(struct rv_monitor_def *mdef) +{ + lockdep_assert_held(&rv_interface_lock); + mdef->rdef->counter--; + WARN_ON_ONCE(mdef->rdef->counter < 0); +} + +/* + * Nop reactor register + */ +static void rv_nop_reaction(char *msg) +{ +} + +static struct rv_reactor rv_nop = { + .name = "nop", + .description = "no-operation reactor: do nothing.", + .react = rv_nop_reaction +}; + +int init_rv_reactors(struct dentry *root_dir) +{ + struct dentry *available, *reacting; + int retval; + + available = rv_create_file("available_reactors", RV_MODE_READ, root_dir, NULL, + &available_reactors_ops); + if (!available) + goto out_err; + + reacting = rv_create_file("reacting_on", RV_MODE_WRITE, root_dir, NULL, &reacting_on_fops); + if (!reacting) + goto rm_available; + + retval = __rv_register_reactor(&rv_nop); + if (retval) + goto rm_reacting; + + turn_reacting_on(); + + return 0; + +rm_reacting: + rv_remove(reacting); +rm_available: + rv_remove(available); +out_err: + return -ENOMEM; +} diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c517c8c8999..d3005279165d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5569,13 +5569,13 @@ static const char readme_msg[] = #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" - "\t Format: p[:[<group>/]<event>] <place> [<args>]\n" - "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n" + "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" + "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/]<event> <field> [<field>]\n" #endif - "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n" - "\t -:[<group>/]<event>\n" + "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n" + "\t -:[<group>/][<event>]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" @@ -9101,6 +9101,16 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size return 0; } +static void free_trace_buffer(struct array_buffer *buf) +{ + if (buf->buffer) { + ring_buffer_free(buf->buffer); + buf->buffer = NULL; + free_percpu(buf->data); + buf->data = NULL; + } +} + static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; @@ -9113,10 +9123,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { - ring_buffer_free(tr->array_buffer.buffer); - tr->array_buffer.buffer = NULL; - free_percpu(tr->array_buffer.data); - tr->array_buffer.data = NULL; + free_trace_buffer(&tr->array_buffer); return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; @@ -9131,16 +9138,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } -static void free_trace_buffer(struct array_buffer *buf) -{ - if (buf->buffer) { - ring_buffer_free(buf->buffer); - buf->buffer = NULL; - free_percpu(buf->data); - buf->data = NULL; - } -} - static void free_trace_buffers(struct trace_array *tr) { if (!tr) @@ -9772,6 +9769,8 @@ static __init int tracer_init_tracefs(void) tracer_init_tracefs_work_func(NULL); } + rv_init_interface(); + return 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ff816fb41e48..900e75d96c84 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2005,4 +2005,13 @@ struct trace_min_max_param { extern const struct file_operations trace_min_max_fops; +#ifdef CONFIG_RV +extern int rv_init_interface(void); +#else +static inline int rv_init_interface(void) +{ + return 0; +} +#endif + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 076b447a1b88..154996684fb5 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -101,7 +101,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type event = p + 1; *p = '\0'; } - if (event[0] == '\0') { + if (!system && event[0] == '\0') { ret = -EINVAL; goto out; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 7d4478525c66..4a0e9d927443 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -125,6 +125,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event, * We match the following: * event only - match all eprobes with event name * system and event only - match all system/event probes + * system only - match all system probes * * The below has the above satisfied with more arguments: * @@ -143,7 +144,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event, return false; /* Must match the event name */ - if (strcmp(trace_probe_name(&ep->tp), event) != 0) + if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0) return false; /* No arguments match all */ @@ -838,8 +839,11 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ if (ret) return ret; - if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) + if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) { ret = trace_eprobe_tp_arg_update(ep, i); + if (ret) + trace_probe_log_err(0, BAD_ATTACH_ARG); + } return ret; } @@ -848,7 +852,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) { /* * Argument syntax: - * e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS] + * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] * Fetch args: * <name>=$<field>[:TYPE] */ @@ -858,6 +862,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) struct trace_eprobe *ep = NULL; char buf1[MAX_EVENT_NAME_LEN]; char buf2[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; int ret = 0; int i; @@ -869,25 +874,25 @@ static int __trace_eprobe_create(int argc, const char *argv[]) event = strchr(&argv[0][1], ':'); if (event) { event++; - ret = traceprobe_parse_event_name(&event, &group, buf1, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto parse_error; - } else { - strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); - sanitize_event_name(buf1); - event = buf1; } - if (!is_good_name(event) || !is_good_name(group)) - goto parse_error; + trace_probe_log_set_index(1); sys_event = argv[1]; - ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, - sys_event - argv[1]); - if (ret || !sys_name) - goto parse_error; - if (!is_good_name(sys_event) || !is_good_name(sys_name)) + ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); + if (!sys_event || !sys_name) { + trace_probe_log_err(0, NO_EVENT_INFO); goto parse_error; + } + + if (!event) { + strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); + sanitize_event_name(buf1); + event = buf1; + } mutex_lock(&event_mutex); event_call = find_and_get_event(sys_name, sys_event); @@ -896,6 +901,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); + if (ret == -ENODEV) + trace_probe_log_err(0, BAD_ATTACH_EVENT); /* This must return -ENOMEM or missing event, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e87a46794079..fdf784620c28 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4455,7 +4455,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, ret = parse_var_defs(hist_data); if (ret) - goto out; + return ret; ret = create_val_fields(hist_data, file); if (ret) @@ -4466,8 +4466,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, goto out; ret = create_key_fields(hist_data, file); - if (ret) - goto out; + out: free_var_defs(hist_data); diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 706e1686b5eb..a6621c52ce45 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -567,7 +567,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) * to allow user_event files to be less locked down. The extreme case * being "other" has read/write access to user_events_data/status. * - * When not locked down, processes may not have have permissions to + * When not locked down, processes may not have permissions to * add/remove calls themselves to tracefs. We need to temporarily * switch to root file permission to allow for this scenario. */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a245ea673715..23f7f0ec4f4c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -163,7 +163,8 @@ static bool trace_kprobe_match(const char *system, const char *event, { struct trace_kprobe *tk = to_trace_kprobe(ev); - return strcmp(trace_probe_name(&tk->tp), event) == 0 && + return (event[0] == '\0' || + strcmp(trace_probe_name(&tk->tp), event) == 0) && (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) && trace_kprobe_match_command_head(tk, argc, argv); } @@ -708,11 +709,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) /* * Argument syntax: * - Add kprobe: - * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] + * p[:[GRP/][EVENT]] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: - * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] + * r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]KSYM[+0] [FETCHARGS] * Or - * p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS] + * p[:[GRP/][EVENT]] [MOD:]KSYM[+0]%return [FETCHARGS] * * Fetch args: * $retval : fetch return value @@ -739,6 +740,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; unsigned int flags = TPARG_FL_KERNEL; switch (argv[0][0]) { @@ -833,11 +835,13 @@ static int __trace_kprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto parse_error; - } else { + } + + if (!event) { /* Make a new event name */ if (symbol) snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 80863c6508e5..850a88abd33b 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -257,6 +257,10 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, } len = strlen(event); if (len == 0) { + if (slash) { + *pevent = NULL; + return 0; + } trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; } else if (len > MAX_EVENT_NAME_LEN) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 92cc149af0fd..3b3869ae8cfd 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -442,7 +442,10 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(FAIL_REG_PROBE, "Failed to register probe event"),\ C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\ - C(SAME_PROBE, "There is already the exact same probe event"), + C(SAME_PROBE, "There is already the exact same probe event"),\ + C(NO_EVENT_INFO, "This requires both group and event name to attach"),\ + C(BAD_ATTACH_EVENT, "Attached event does not exist"),\ + C(BAD_ATTACH_ARG, "Attached event does not have this field"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 88ba5b4bd0c5..fb58e86dd117 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -313,7 +313,8 @@ static bool trace_uprobe_match(const char *system, const char *event, { struct trace_uprobe *tu = to_trace_uprobe(ev); - return strcmp(trace_probe_name(&tu->tp), event) == 0 && + return (event[0] == '\0' || + strcmp(trace_probe_name(&tu->tp), event) == 0) && (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && trace_uprobe_match_command_head(tu, argc, argv); } @@ -533,7 +534,7 @@ end: /* * Argument syntax: - * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] + * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ static int __trace_uprobe_create(int argc, const char **argv) { @@ -541,6 +542,7 @@ static int __trace_uprobe_create(int argc, const char **argv) const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; char *arg, *filename, *rctr, *rctr_end, *tmp; char buf[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; enum probe_print_type ptype; struct path path; unsigned long offset, ref_ctr_offset; @@ -645,11 +647,13 @@ static int __trace_uprobe_create(int argc, const char **argv) /* setup a probe */ trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto fail_address_parse; - } else { + } + + if (!event) { char *tail; char *ptr; diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index d673ebdd0426..31c8f922651d 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -28,8 +28,6 @@ #endif /* CONFIG_BATMAN_ADV_TRACING */ -#define BATADV_MAX_MSG_LEN 256 - TRACE_EVENT(batadv_dbg, TP_PROTO(struct batadv_priv *bat_priv, @@ -40,16 +38,13 @@ TRACE_EVENT(batadv_dbg, TP_STRUCT__entry( __string(device, bat_priv->soft_iface->name) __string(driver, KBUILD_MODNAME) - __dynamic_array(char, msg, BATADV_MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, bat_priv->soft_iface->name); __assign_str(driver, KBUILD_MODNAME); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - BATADV_MAX_MSG_LEN, - vaf->fmt, - *vaf->va) >= BATADV_MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 40141df09f25..c9dbe9aab7bd 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -24,13 +24,11 @@ DECLARE_EVENT_CLASS(mac80211_msg_event, TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 4d34dc0b0fee..608c4ae3b08a 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -19,9 +19,10 @@ static const char *random_strings[] = { "One ring to rule them all" }; -static void simple_thread_func(int cnt) +static void do_simple_thread_func(int cnt, const char *fmt, ...) { unsigned long bitmask[1] = {0xdeadbeefUL}; + va_list va; int array[6]; int len = cnt % 5; int i; @@ -33,9 +34,13 @@ static void simple_thread_func(int cnt) array[i] = i + 1; array[i] = 0; + va_start(va, fmt); + /* Silly tracepoints */ trace_foo_bar("hello", cnt, array, random_strings[len], - current->cpus_ptr); + current->cpus_ptr, fmt, &va); + + va_end(va); trace_foo_with_template_simple("HELLO", cnt); @@ -48,6 +53,11 @@ static void simple_thread_func(int cnt) trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); } +static void simple_thread_func(int cnt) +{ + do_simple_thread_func(cnt, "iter=%d", cnt); +} + static int simple_thread(void *arg) { int cnt = 0; diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index cbbbb83beced..1a92226202fc 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -141,6 +141,27 @@ * In most cases, the __assign_str() macro will take the same * parameters as the __string() macro had to declare the string. * + * __vstring: This is similar to __string() but instead of taking a + * dynamic length, it takes a variable list va_list 'va' variable. + * Some event callers already have a message from parameters saved + * in a va_list. Passing in the format and the va_list variable + * will save just enough on the ring buffer for that string. + * Note, the va variable used is a pointer to a va_list, not + * to the va_list directly. + * + * (va_list *va) + * + * __vstring(foo, fmt, va) is similar to: vsnprintf(foo, fmt, va) + * + * To assign the string, use the helper macro __assign_vstr(). + * + * __assign_vstr(foo, fmt, va); + * + * In most cases, the __assign_vstr() macro will take the same + * parameters as the __vstring() macro had to declare the string. + * Use __get_str() to retrieve the __vstring() just like it would for + * __string(). + * * __string_len: This is a helper to a __dynamic_array, but it understands * that the array has characters in it, and with the combined * use of __assign_str_len(), it will allocate 'len' + 1 bytes @@ -256,9 +277,10 @@ TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO); TRACE_EVENT(foo_bar, TP_PROTO(const char *foo, int bar, const int *lst, - const char *string, const struct cpumask *mask), + const char *string, const struct cpumask *mask, + const char *fmt, va_list *va), - TP_ARGS(foo, bar, lst, string, mask), + TP_ARGS(foo, bar, lst, string, mask, fmt, va), TP_STRUCT__entry( __array( char, foo, 10 ) @@ -266,6 +288,7 @@ TRACE_EVENT(foo_bar, __dynamic_array(int, list, __length_of(lst)) __string( str, string ) __bitmask( cpus, num_possible_cpus() ) + __vstring( vstr, fmt, va ) ), TP_fast_assign( @@ -274,10 +297,11 @@ TRACE_EVENT(foo_bar, memcpy(__get_dynamic_array(list), lst, __length_of(lst) * sizeof(int)); __assign_str(str, string); + __assign_vstr(vstr, fmt, va); __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); ), - TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s (%s) %s", __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -321,7 +345,7 @@ TRACE_EVENT(foo_bar, __print_array(__get_dynamic_array(list), __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), - __get_str(str), __get_bitmask(cpus)) + __get_str(str), __get_bitmask(cpus), __get_str(vstr)) ); /* diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 7011fbe003ff..438516bdfb3c 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -8,7 +8,7 @@ This script parses a trace provided by the function tracer in kernel/trace/trace_functions.c The resulted trace is processed into a tree to produce a more human view of the call stack by drawing textual but hierarchical tree of -calls. Only the functions's names and the the call time are provided. +calls. Only the functions's names and the call time are provided. Usage: Be sure that you have CONFIG_FUNCTION_TRACER diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc index 60c02b482be8..c300eb020262 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove eprobe events -# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README +# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README echo 0 > events/enable @@ -87,4 +87,11 @@ echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events ! grep -q "$EPROBE" dynamic_events ! test -d events/eprobes/$EPROBE +if grep -q "e\[:\[<group>/]\[<event>]]" README; then + echo "e:mygroup/ $SYSTEM/$EVENT $OPTIONS" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index b4da41d126d5..13d43f40a6fc 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -23,4 +23,11 @@ grep -q myevent1 dynamic_events echo > dynamic_events +if grep -q "p\[:\[<group>/]\[<event>]]" README; then + echo "p:mygroup/ $PLACE" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index fa928b431555..9e85d3019ff0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -21,7 +21,7 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME -check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME +check_error 'p:^ vfs_read' # NO_EVENT_NAME check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME diff --git a/tools/verification/dot2/Makefile b/tools/verification/dot2/Makefile new file mode 100644 index 000000000000..021beb07a521 --- /dev/null +++ b/tools/verification/dot2/Makefile @@ -0,0 +1,26 @@ +INSTALL=install + +prefix ?= /usr +bindir ?= $(prefix)/bin +mandir ?= $(prefix)/share/man +miscdir ?= $(prefix)/share/dot2 +srcdir ?= $(prefix)/src + +PYLIB ?= $(shell python3 -c 'import sysconfig; print (sysconfig.get_path("purelib"))') + +.PHONY: all +all: + +.PHONY: clean +clean: + +.PHONY: install +install: + $(INSTALL) automata.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/automata.py + $(INSTALL) dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2c.py + $(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/ + $(INSTALL) dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2k.py + $(INSTALL) dot2k -D -m 755 $(DESTDIR)$(bindir)/ + + mkdir -p ${miscdir}/ + cp -rp dot2k_templates $(DESTDIR)$(miscdir)/ diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py new file mode 100644 index 000000000000..baffeb960ff0 --- /dev/null +++ b/tools/verification/dot2/automata.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# Automata object: parse an automata in dot file digraph format into a python object +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst + +import ntpath + +class Automata: + """Automata class: Reads a dot file and part it as an automata. + + Attributes: + dot_file: A dot file with an state_automaton definition. + """ + + invalid_state_str = "INVALID_STATE" + + def __init__(self, file_path): + self.__dot_path = file_path + self.name = self.__get_model_name() + self.__dot_lines = self.__open_dot() + self.states, self.initial_state, self.final_states = self.__get_state_variables() + self.events = self.__get_event_variables() + self.function = self.__create_matrix() + + def __get_model_name(self): + basename = ntpath.basename(self.__dot_path) + if basename.endswith(".dot") == False: + print("not a dot file") + raise Exception("not a dot file: %s" % self.__dot_path) + + model_name = basename[0:-4] + if model_name.__len__() == 0: + raise Exception("not a dot file: %s" % self.__dot_path) + + return model_name + + def __open_dot(self): + cursor = 0 + dot_lines = [] + try: + dot_file = open(self.__dot_path) + except: + raise Exception("Cannot open the file: %s" % self.__dot_path) + + dot_lines = dot_file.read().splitlines() + dot_file.close() + + # checking the first line: + line = dot_lines[cursor].split() + + if (line[0] != "digraph") and (line[1] != "state_automaton"): + raise Exception("Not a valid .dot format: %s" % self.__dot_path) + else: + cursor += 1 + return dot_lines + + def __get_cursor_begin_states(self): + cursor = 0 + while self.__dot_lines[cursor].split()[0] != "{node": + cursor += 1 + return cursor + + def __get_cursor_begin_events(self): + cursor = 0 + while self.__dot_lines[cursor].split()[0] != "{node": + cursor += 1 + while self.__dot_lines[cursor].split()[0] == "{node": + cursor += 1 + # skip initial state transition + cursor += 1 + return cursor + + def __get_state_variables(self): + # wait for node declaration + states = [] + final_states = [] + + has_final_states = False + cursor = self.__get_cursor_begin_states() + + # process nodes + while self.__dot_lines[cursor].split()[0] == "{node": + line = self.__dot_lines[cursor].split() + raw_state = line[-1] + + # "enabled_fired"}; -> enabled_fired + state = raw_state.replace('"', '').replace('};', '').replace(',','_') + if state[0:7] == "__init_": + initial_state = state[7:] + else: + states.append(state) + if self.__dot_lines[cursor].__contains__("doublecircle") == True: + final_states.append(state) + has_final_states = True + + if self.__dot_lines[cursor].__contains__("ellipse") == True: + final_states.append(state) + has_final_states = True + + cursor += 1 + + states = sorted(set(states)) + states.remove(initial_state) + + # Insert the initial state at the bein og the states + states.insert(0, initial_state) + + if has_final_states == False: + final_states.append(initial_state) + + return states, initial_state, final_states + + def __get_event_variables(self): + # here we are at the begin of transitions, take a note, we will return later. + cursor = self.__get_cursor_begin_events() + + events = [] + while self.__dot_lines[cursor][1] == '"': + # transitions have the format: + # "all_fired" -> "both_fired" [ label = "disable_irq" ]; + # ------------ event is here ------------^^^^^ + if self.__dot_lines[cursor].split()[1] == "->": + line = self.__dot_lines[cursor].split() + event = line[-2].replace('"','') + + # when a transition has more than one lables, they are like this + # "local_irq_enable\nhw_local_irq_enable_n" + # so split them. + + event = event.replace("\\n", " ") + for i in event.split(): + events.append(i) + cursor += 1 + + return sorted(set(events)) + + def __create_matrix(self): + # transform the array into a dictionary + events = self.events + states = self.states + events_dict = {} + states_dict = {} + nr_event = 0 + for event in events: + events_dict[event] = nr_event + nr_event += 1 + + nr_state = 0 + for state in states: + states_dict[state] = nr_state + nr_state += 1 + + # declare the matrix.... + matrix = [[ self.invalid_state_str for x in range(nr_event)] for y in range(nr_state)] + + # and we are back! Let's fill the matrix + cursor = self.__get_cursor_begin_events() + + while self.__dot_lines[cursor][1] == '"': + if self.__dot_lines[cursor].split()[1] == "->": + line = self.__dot_lines[cursor].split() + origin_state = line[0].replace('"','').replace(',','_') + dest_state = line[2].replace('"','').replace(',','_') + possible_events = line[-2].replace('"','').replace("\\n", " ") + for event in possible_events.split(): + matrix[states_dict[origin_state]][events_dict[event]] = dest_state + cursor += 1 + + return matrix diff --git a/tools/verification/dot2/dot2c b/tools/verification/dot2/dot2c new file mode 100644 index 000000000000..3fe89ab88b65 --- /dev/null +++ b/tools/verification/dot2/dot2c @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2c: parse an automata in dot file digraph format into a C +# +# This program was written in the development of this paper: +# de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. +# "Efficient Formal Verification for the Linux Kernel." International +# Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst + +if __name__ == '__main__': + from dot2 import dot2c + import argparse + import sys + + parser = argparse.ArgumentParser(description='dot2c: converts a .dot file into a C structure') + parser.add_argument('dot_file', help='The dot file to be converted') + + args = parser.parse_args() + d = dot2c.Dot2c(args.dot_file) + d.print_model_classic() diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py new file mode 100644 index 000000000000..fa73353f7e56 --- /dev/null +++ b/tools/verification/dot2/dot2c.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2c: parse an automata in dot file digraph format into a C +# +# This program was written in the development of this paper: +# de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. +# "Efficient Formal Verification for the Linux Kernel." International +# Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst + +from dot2.automata import Automata + +class Dot2c(Automata): + enum_suffix = "" + enum_states_def = "states" + enum_events_def = "events" + struct_automaton_def = "automaton" + var_automaton_def = "aut" + + def __init__(self, file_path): + super().__init__(file_path) + self.line_length = 100 + + def __buff_to_string(self, buff): + string = "" + + for line in buff: + string = string + line + "\n" + + # cut off the last \n + return string[:-1] + + def __get_enum_states_content(self): + buff = [] + buff.append("\t%s%s = 0," % (self.initial_state, self.enum_suffix)) + for state in self.states: + if state != self.initial_state: + buff.append("\t%s%s," % (state, self.enum_suffix)) + buff.append("\tstate_max%s" % (self.enum_suffix)) + + return buff + + def get_enum_states_string(self): + buff = self.__get_enum_states_content() + return self.__buff_to_string(buff) + + def format_states_enum(self): + buff = [] + buff.append("enum %s {" % self.enum_states_def) + buff.append(self.get_enum_states_string()) + buff.append("};\n") + + return buff + + def __get_enum_events_content(self): + buff = [] + first = True + for event in self.events: + if first: + buff.append("\t%s%s = 0," % (event, self.enum_suffix)) + first = False + else: + buff.append("\t%s%s," % (event, self.enum_suffix)) + + buff.append("\tevent_max%s" % self.enum_suffix) + + return buff + + def get_enum_events_string(self): + buff = self.__get_enum_events_content() + return self.__buff_to_string(buff) + + def format_events_enum(self): + buff = [] + buff.append("enum %s {" % self.enum_events_def) + buff.append(self.get_enum_events_string()) + buff.append("};\n") + + return buff + + def get_minimun_type(self): + min_type = "unsigned char" + + if self.states.__len__() > 255: + min_type = "unsigned short" + + if self.states.__len__() > 65535: + min_type = "unsigned int" + + if self.states.__len__() > 1000000: + raise Exception("Too many states: %d" % self.states.__len__()) + + return min_type + + def format_automaton_definition(self): + min_type = self.get_minimun_type() + buff = [] + buff.append("struct %s {" % self.struct_automaton_def) + buff.append("\tchar *state_names[state_max%s];" % (self.enum_suffix)) + buff.append("\tchar *event_names[event_max%s];" % (self.enum_suffix)) + buff.append("\t%s function[state_max%s][event_max%s];" % (min_type, self.enum_suffix, self.enum_suffix)) + buff.append("\t%s initial_state;" % min_type) + buff.append("\tbool final_states[state_max%s];" % (self.enum_suffix)) + buff.append("};\n") + return buff + + def format_aut_init_header(self): + buff = [] + buff.append("struct %s %s = {" % (self.struct_automaton_def, self.var_automaton_def)) + return buff + + def __get_string_vector_per_line_content(self, buff): + first = True + string = "" + for entry in buff: + if first: + string = string + "\t\t\"" + entry + first = False; + else: + string = string + "\",\n\t\t\"" + entry + string = string + "\"" + + return string + + def get_aut_init_events_string(self): + return self.__get_string_vector_per_line_content(self.events) + + def get_aut_init_states_string(self): + return self.__get_string_vector_per_line_content(self.states) + + def format_aut_init_events_string(self): + buff = [] + buff.append("\t.event_names = {") + buff.append(self.get_aut_init_events_string()) + buff.append("\t},") + return buff + + def format_aut_init_states_string(self): + buff = [] + buff.append("\t.state_names = {") + buff.append(self.get_aut_init_states_string()) + buff.append("\t},") + + return buff + + def __get_max_strlen_of_states(self): + max_state_name = max(self.states, key = len).__len__() + return max(max_state_name, self.invalid_state_str.__len__()) + + def __get_state_string_length(self): + maxlen = self.__get_max_strlen_of_states() + self.enum_suffix.__len__() + return "%" + str(maxlen) + "s" + + def get_aut_init_function(self): + nr_states = self.states.__len__() + nr_events = self.events.__len__() + buff = [] + + strformat = self.__get_state_string_length() + + for x in range(nr_states): + line = "\t\t{ " + for y in range(nr_events): + next_state = self.function[x][y] + if next_state != self.invalid_state_str: + next_state = self.function[x][y] + self.enum_suffix + + if y != nr_events-1: + line = line + strformat % next_state + ", " + else: + line = line + strformat % next_state + " }," + buff.append(line) + + return self.__buff_to_string(buff) + + def format_aut_init_function(self): + buff = [] + buff.append("\t.function = {") + buff.append(self.get_aut_init_function()) + buff.append("\t},") + + return buff + + def get_aut_init_initial_state(self): + return self.initial_state + + def format_aut_init_initial_state(self): + buff = [] + initial_state = self.get_aut_init_initial_state() + buff.append("\t.initial_state = " + initial_state + self.enum_suffix + ",") + + return buff + + def get_aut_init_final_states(self): + line = "" + first = True + for state in self.states: + if first == False: + line = line + ', ' + else: + first = False + + if self.final_states.__contains__(state): + line = line + '1' + else: + line = line + '0' + return line + + def format_aut_init_final_states(self): + buff = [] + buff.append("\t.final_states = { %s }," % self.get_aut_init_final_states()) + + return buff + + def __get_automaton_initialization_footer_string(self): + footer = "};\n" + return footer + + def format_aut_init_footer(self): + buff = [] + buff.append(self.__get_automaton_initialization_footer_string()) + + return buff + + def format_invalid_state(self): + buff = [] + buff.append("#define %s state_max%s\n" % (self.invalid_state_str, self.enum_suffix)) + + return buff + + def format_model(self): + buff = [] + buff += self.format_states_enum() + buff += self.format_invalid_state() + buff += self.format_events_enum() + buff += self.format_automaton_definition() + buff += self.format_aut_init_header() + buff += self.format_aut_init_states_string() + buff += self.format_aut_init_events_string() + buff += self.format_aut_init_function() + buff += self.format_aut_init_initial_state() + buff += self.format_aut_init_final_states() + buff += self.format_aut_init_footer() + + return buff + + def print_model_classic(self): + buff = self.format_model() + print(self.__buff_to_string(buff)) diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k new file mode 100644 index 000000000000..9dcd38abe20a --- /dev/null +++ b/tools/verification/dot2/dot2k @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst + +if __name__ == '__main__': + from dot2.dot2k import dot2k + import argparse + import ntpath + import os + import platform + import sys + import sys + import argparse + + parser = argparse.ArgumentParser(description='transform .dot file into kernel rv monitor') + parser.add_argument('-d', "--dot", dest="dot_file", required=True) + parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True) + parser.add_argument('-n', "--model_name", dest="model_name", required=False) + parser.add_argument("-D", "--description", dest="description", required=False) + params = parser.parse_args() + + print("Opening and parsing the dot file %s" % params.dot_file) + try: + monitor=dot2k(params.dot_file, params.monitor_type) + except Exception as e: + print('Error: '+ str(e)) + print("Sorry : :-(") + sys.exit(1) + + # easier than using argparse action. + if params.model_name != None: + print(params.model_name) + + print("Writing the monitor into the directory %s" % monitor.name) + monitor.print_files() + print("Almost done, checklist") + print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) + print(" - Edit include/trace/events/rv.h to add the tracepoint entry") + print(" - Move it to the kernel's monitor directory") + print(" - Edit kernel/trace/rv/Makefile") + print(" - Edit kernel/trace/rv/Kconfig") diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py new file mode 100644 index 000000000000..016550fccf1f --- /dev/null +++ b/tools/verification/dot2/dot2k.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst + +from dot2.dot2c import Dot2c +import platform +import os + +class dot2k(Dot2c): + monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } + monitor_templates_dir = "dot2k/rv_templates/" + monitor_type = "per_cpu" + + def __init__(self, file_path, MonitorType): + super().__init__(file_path) + + self.monitor_type = self.monitor_types.get(MonitorType) + if self.monitor_type == None: + raise Exception("Unknown monitor type: %s" % MonitorType) + + self.monitor_type = MonitorType + self.__fill_rv_templates_dir() + self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c") + self.enum_suffix = "_%s" % self.name + + def __fill_rv_templates_dir(self): + + if os.path.exists(self.monitor_templates_dir) == True: + return + + if platform.system() != "Linux": + raise Exception("I can only run on Linux.") + + kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) + + if os.path.exists(kernel_path) == True: + self.monitor_templates_dir = kernel_path + return + + if os.path.exists("/usr/share/dot2/dot2k_templates/") == True: + self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" + return + + raise Exception("Could not find the template directory, do you have the kernel source installed?") + + + def __open_file(self, path): + try: + fd = open(path) + except OSError: + raise Exception("Cannot open the file: %s" % path) + + content = fd.read() + + return content + + def __buff_to_string(self, buff): + string = "" + + for line in buff: + string = string + line + "\n" + + # cut off the last \n + return string[:-1] + + def fill_tracepoint_handlers_skel(self): + buff = [] + for event in self.events: + buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) + buff.append("{") + if self.monitor_type == "per_task": + buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); + buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix)); + else: + buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix)); + buff.append("}") + buff.append("") + return self.__buff_to_string(buff) + + def fill_tracepoint_attach_probe(self): + buff = [] + for event in self.events: + buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return self.__buff_to_string(buff) + + def fill_tracepoint_detach_helper(self): + buff = [] + for event in self.events: + buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return self.__buff_to_string(buff) + + def fill_main_c(self): + main_c = self.main_c + min_type = self.get_minimun_type() + nr_events = self.events.__len__() + tracepoint_handlers = self.fill_tracepoint_handlers_skel() + tracepoint_attach = self.fill_tracepoint_attach_probe() + tracepoint_detach = self.fill_tracepoint_detach_helper() + + main_c = main_c.replace("MIN_TYPE", min_type) + main_c = main_c.replace("MODEL_NAME", self.name) + main_c = main_c.replace("NR_EVENTS", str(nr_events)) + main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers) + main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach) + main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach) + + return main_c + + def fill_model_h_header(self): + buff = [] + buff.append("/*") + buff.append(" * Automatically generated C representation of %s automaton" % (self.name)) + buff.append(" * For further information about this format, see kernel documentation:") + buff.append(" * Documentation/trace/rv/deterministic_automata.rst") + buff.append(" */") + buff.append("") + + return buff + + def fill_model_h(self): + # + # Adjust the definition names + # + self.enum_states_def = "states_%s" % self.name + self.enum_events_def = "events_%s" % self.name + self.struct_automaton_def = "automaton_%s" % self.name + self.var_automaton_def = "automaton_%s" % self.name + + buff = self.fill_model_h_header() + buff += self.format_model() + + return self.__buff_to_string(buff) + + def __create_directory(self): + try: + os.mkdir(self.name) + except FileExistsError: + return + except: + print("Fail creating the output dir: %s" % self.name) + + def __create_file(self, file_name, content): + path = "%s/%s" % (self.name, file_name) + try: + file = open(path, 'w') + except FileExistsError: + return + except: + print("Fail creating file: %s" % path) + + file.write(content) + + file.close() + + def __get_main_name(self): + path = "%s/%s" % (self.name, "main.c") + if os.path.exists(path) == False: + return "main.c" + return "__main.c" + + def print_files(self): + main_c = self.fill_main_c() + model_h = self.fill_model_h() + + self.__create_directory() + + path = "%s.c" % self.name + self.__create_file(path, main_c) + + path = "%s.h" % self.name + self.__create_file(path, model_h) diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c new file mode 100644 index 000000000000..f4b712dbc92e --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_global.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include <trace/events/sched.h> + */ +#include <trace/events/rv.h> + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c new file mode 100644 index 000000000000..4080d1ca3354 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_per_cpu.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include <linux/trace/events/sched.h> + */ +#include <trace/events/rv.h> + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c new file mode 100644 index 000000000000..89197175384f --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_per_task.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include <linux/trace/events/sched.h> + */ +#include <trace/events/rv.h> + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/models/wip.dot b/tools/verification/models/wip.dot new file mode 100644 index 000000000000..2a53a9700a89 --- /dev/null +++ b/tools/verification/models/wip.dot @@ -0,0 +1,16 @@ +digraph state_automaton { + {node [shape = circle] "non_preemptive"}; + {node [shape = plaintext, style=invis, label=""] "__init_preemptive"}; + {node [shape = doublecircle] "preemptive"}; + {node [shape = circle] "preemptive"}; + "__init_preemptive" -> "preemptive"; + "non_preemptive" [label = "non_preemptive"]; + "non_preemptive" -> "non_preemptive" [ label = "sched_waking" ]; + "non_preemptive" -> "preemptive" [ label = "preempt_enable" ]; + "preemptive" [label = "preemptive"]; + "preemptive" -> "non_preemptive" [ label = "preempt_disable" ]; + { rank = min ; + "__init_preemptive"; + "preemptive"; + } +} diff --git a/tools/verification/models/wwnr.dot b/tools/verification/models/wwnr.dot new file mode 100644 index 000000000000..1b206e83129c --- /dev/null +++ b/tools/verification/models/wwnr.dot @@ -0,0 +1,16 @@ +digraph state_automaton { + {node [shape = plaintext, style=invis, label=""] "__init_not_running"}; + {node [shape = ellipse] "not_running"}; + {node [shape = plaintext] "not_running"}; + {node [shape = plaintext] "running"}; + "__init_not_running" -> "not_running"; + "not_running" [label = "not_running", color = green3]; + "not_running" -> "not_running" [ label = "wakeup" ]; + "not_running" -> "running" [ label = "switch_in" ]; + "running" [label = "running"]; + "running" -> "not_running" [ label = "switch_out" ]; + { rank = min ; + "__init_not_running"; + "not_running"; + } +} |