summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/gpio/gpio-event-mon.c146
-rw-r--r--tools/gpio/gpio-hammer.c56
-rw-r--r--tools/gpio/gpio-utils.c176
-rw-r--r--tools/gpio/gpio-utils.h48
-rw-r--r--tools/gpio/gpio-watch.c16
-rw-r--r--tools/gpio/lsgpio.c60
-rw-r--r--tools/include/linux/static_call_types.h35
-rw-r--r--tools/include/uapi/asm-generic/unistd.h14
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt33
-rw-r--r--tools/memory-model/Documentation/litmus-tests.txt1074
-rw-r--r--tools/memory-model/Documentation/recipes.txt4
-rw-r--r--tools/memory-model/Documentation/references.txt2
-rw-r--r--tools/memory-model/Documentation/simple.txt271
-rw-r--r--tools/memory-model/README160
-rw-r--r--tools/objtool/check.c193
-rw-r--r--tools/objtool/check.h1
-rw-r--r--tools/objtool/elf.c8
-rw-r--r--tools/objtool/elf.h3
-rw-r--r--tools/objtool/objtool.h1
-rw-r--r--tools/objtool/orc_gen.c4
-rwxr-xr-xtools/objtool/sync-check.sh1
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl12
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl12
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl10
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py2
-rw-r--r--tools/testing/selftests/rseq/param_test.c223
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h57
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh2
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c68
29 files changed, 2377 insertions, 315 deletions
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1a303a81aeef..90c3155f05b1 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,17 +23,17 @@
#include <sys/ioctl.h>
#include <sys/types.h>
#include <linux/gpio.h>
+#include "gpio-utils.h"
int monitor_device(const char *device_name,
- unsigned int line,
- uint32_t handleflags,
- uint32_t eventflags,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
unsigned int loops)
{
- struct gpioevent_request req;
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
char *chrdev_name;
- int fd;
+ int cfd, lfd;
int ret;
int i = 0;
@@ -41,44 +41,55 @@ int monitor_device(const char *device_name,
if (ret < 0)
return -ENOMEM;
- fd = open(chrdev_name, 0);
- if (fd == -1) {
+ cfd = open(chrdev_name, 0);
+ if (cfd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
goto exit_free_name;
}
- req.lineoffset = line;
- req.handleflags = handleflags;
- req.eventflags = eventflags;
- strcpy(req.consumer_label, "gpio-event-mon");
-
- ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GET EVENT "
- "IOCTL (%d)\n",
- ret);
- goto exit_close_error;
- }
+ ret = gpiotools_request_line(device_name, lines, num_lines, config,
+ "gpio-event-mon");
+ if (ret < 0)
+ goto exit_device_close;
+ else
+ lfd = ret;
/* Read initial states */
- ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
- "VALUES IOCTL (%d)\n",
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+ ret = gpiotools_get_values(lfd, &values);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n",
ret);
- goto exit_close_error;
+ goto exit_line_close;
}
- fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
- fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+ if (num_lines == 1) {
+ fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name);
+ fprintf(stdout, "Initial line value: %d\n",
+ gpiotools_test_bit(values.bits, 0));
+ } else {
+ fprintf(stdout, "Monitoring lines %d", lines[0]);
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d", lines[i]);
+ fprintf(stdout, " and %d on %s\n", lines[i], device_name);
+ fprintf(stdout, "Initial line values: %d",
+ gpiotools_test_bit(values.bits, 0));
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d",
+ gpiotools_test_bit(values.bits, i));
+ fprintf(stdout, " and %d\n",
+ gpiotools_test_bit(values.bits, i));
+ }
while (1) {
- struct gpioevent_data event;
+ struct gpio_v2_line_event event;
- ret = read(req.fd, &event, sizeof(event));
+ ret = read(lfd, &event, sizeof(event));
if (ret == -1) {
if (errno == -EAGAIN) {
fprintf(stderr, "nothing available\n");
@@ -96,12 +107,14 @@ int monitor_device(const char *device_name,
ret = -EIO;
break;
}
- fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+ fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ",
+ event.timestamp_ns, event.offset, event.line_seqno,
+ event.seqno);
switch (event.id) {
- case GPIOEVENT_EVENT_RISING_EDGE:
+ case GPIO_V2_LINE_EVENT_RISING_EDGE:
fprintf(stdout, "rising edge");
break;
- case GPIOEVENT_EVENT_FALLING_EDGE:
+ case GPIO_V2_LINE_EVENT_FALLING_EDGE:
fprintf(stdout, "falling edge");
break;
default:
@@ -114,8 +127,11 @@ int monitor_device(const char *device_name,
break;
}
-exit_close_error:
- if (close(fd) == -1)
+exit_line_close:
+ if (close(lfd) == -1)
+ perror("Failed to close line file");
+exit_device_close:
+ if (close(cfd) == -1)
perror("Failed to close GPIO character device file");
exit_free_name:
free(chrdev_name);
@@ -127,29 +143,37 @@ void print_usage(void)
fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
"Listen to events on GPIO lines, 0->1 1->0\n"
" -n <name> Listen on GPIOs on a named device (must be stated)\n"
- " -o <n> Offset to monitor\n"
+ " -o <n> Offset of line to monitor (may be repeated)\n"
" -d Set line as open drain\n"
" -s Set line as open source\n"
" -r Listen for rising edges\n"
" -f Listen for falling edges\n"
+ " -b <n> Debounce the line with period n microseconds\n"
" [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
" -? This helptext\n"
"\n"
"Example:\n"
- "gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+ "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n"
);
}
+#define EDGE_FLAGS \
+ (GPIO_V2_LINE_FLAG_EDGE_RISING | \
+ GPIO_V2_LINE_FLAG_EDGE_FALLING)
+
int main(int argc, char **argv)
{
const char *device_name = NULL;
- unsigned int line = -1;
+ unsigned int lines[GPIO_V2_LINES_MAX];
+ unsigned int num_lines = 0;
unsigned int loops = 0;
- uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
- uint32_t eventflags = 0;
- int c;
+ struct gpio_v2_line_config config;
+ int c, attr, i;
+ unsigned long debounce_period_us = 0;
- while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
switch (c) {
case 'c':
loops = strtoul(optarg, NULL, 10);
@@ -158,19 +182,27 @@ int main(int argc, char **argv)
device_name = optarg;
break;
case 'o':
- line = strtoul(optarg, NULL, 10);
+ if (num_lines >= GPIO_V2_LINES_MAX) {
+ print_usage();
+ return -1;
+ }
+ lines[num_lines] = strtoul(optarg, NULL, 10);
+ num_lines++;
+ break;
+ case 'b':
+ debounce_period_us = strtoul(optarg, NULL, 10);
break;
case 'd':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN;
break;
case 's':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE;
break;
case 'r':
- eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING;
break;
case 'f':
- eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
break;
case '?':
print_usage();
@@ -178,15 +210,23 @@ int main(int argc, char **argv)
}
}
- if (!device_name || line == -1) {
+ if (debounce_period_us) {
+ attr = config.num_attrs;
+ config.num_attrs++;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&config.attrs[attr].mask, i);
+ config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
+ config.attrs[attr].attr.debounce_period_us = debounce_period_us;
+ }
+
+ if (!device_name || num_lines == 0) {
print_usage();
return -1;
}
- if (!eventflags) {
+ if (!(config.flags & EDGE_FLAGS)) {
printf("No flags specified, listening on both rising and "
"falling edges\n");
- eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+ config.flags |= EDGE_FLAGS;
}
- return monitor_device(device_name, line, handleflags,
- eventflags, loops);
+ return monitor_device(device_name, lines, num_lines, &config, loops);
}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 9fd926e8cb52..54fdf59dd320 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -22,39 +22,46 @@
#include <linux/gpio.h>
#include "gpio-utils.h"
-int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
unsigned int loops)
{
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
+ struct gpio_v2_line_config config;
char swirr[] = "-\\|/";
int fd;
int ret;
int i, j;
unsigned int iteration = 0;
- memset(&data.values, 0, sizeof(data.values));
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, &data,
- "gpio-hammer");
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, "gpio-hammer");
if (ret < 0)
goto exit_error;
else
fd = ret;
- ret = gpiotools_get_values(fd, &data);
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
fprintf(stdout, "Hammer lines [");
- for (i = 0; i < nlines; i++) {
+ for (i = 0; i < num_lines; i++) {
fprintf(stdout, "%d", lines[i]);
- if (i != (nlines - 1))
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "] on %s, initial states: [", device_name);
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d", data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\n");
@@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
while (1) {
/* Invert all lines so we blink */
- for (i = 0; i < nlines; i++)
- data.values[i] = !data.values[i];
+ for (i = 0; i < num_lines; i++)
+ gpiotools_change_bit(&values.bits, i);
- ret = gpiotools_set_values(fd, &data);
+ ret = gpiotools_set_values(fd, &values);
if (ret < 0)
goto exit_close_error;
/* Re-read values to get status */
- ret = gpiotools_get_values(fd, &data);
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
@@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
fprintf(stdout, "[");
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d: %d", lines[i], data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d: %d", lines[i],
+ gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\r");
@@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
ret = 0;
exit_close_error:
- gpiotools_release_linehandle(fd);
+ gpiotools_release_line(fd);
exit_error:
return ret;
}
@@ -121,7 +129,7 @@ int main(int argc, char **argv)
const char *device_name = NULL;
unsigned int lines[GPIOHANDLES_MAX];
unsigned int loops = 0;
- int nlines;
+ int num_lines;
int c;
int i;
@@ -158,11 +166,11 @@ int main(int argc, char **argv)
return -1;
}
- nlines = i;
+ num_lines = i;
- if (!device_name || !nlines) {
+ if (!device_name || !num_lines) {
print_usage();
return -1;
}
- return hammer_device(device_name, lines, nlines, loops);
+ return hammer_device(device_name, lines, num_lines, loops);
}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 16a5d9cb9da2..37187e056c8b 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -38,7 +38,7 @@
* such as "gpiochip0"
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
+ * @num_lines: The number of lines to request.
* @flag: The new flag for requsted gpio. Reference
* "linux/gpio.h" for the meaning of flag.
* @data: Default value will be set to gpio when flag is
@@ -56,7 +56,7 @@
* On failure return the errno.
*/
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label)
{
@@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
goto exit_free_name;
}
- for (i = 0; i < nlines; i++)
+ for (i = 0; i < num_lines; i++)
req.lineoffsets[i] = lines[i];
req.flags = flag;
strcpy(req.consumer_label, consumer_label);
- req.lines = nlines;
+ req.lines = num_lines;
if (flag & GPIOHANDLE_REQUEST_OUTPUT)
memcpy(req.default_values, data, sizeof(req.default_values));
@@ -100,20 +100,87 @@ exit_free_name:
free(chrdev_name);
return ret < 0 ? ret : req.fd;
}
+
+/**
+ * gpiotools_request_line() - request gpio lines in a gpiochip
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @num_lines: The number of lines to request.
+ * @config: The new config for requested gpio. Reference
+ * "linux/gpio.h" for config details.
+ * @consumer: The name of consumer, such as "sysfs",
+ * "powerkey". This is useful for other users to
+ * know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_line() to release these lines after that.
+ *
+ * Return: On success return the fd;
+ * On failure return the errno.
+ */
+int gpiotools_request_line(const char *device_name, unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer)
+{
+ struct gpio_v2_line_request req;
+ char *chrdev_name;
+ int fd;
+ int i;
+ int ret;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s, %s\n",
+ chrdev_name, strerror(errno));
+ goto exit_free_name;
+ }
+
+ memset(&req, 0, sizeof(req));
+ for (i = 0; i < num_lines; i++)
+ req.offsets[i] = lines[i];
+
+ req.config = *config;
+ strcpy(req.consumer, consumer);
+ req.num_lines = num_lines;
+
+ ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIO_GET_LINE_IOCTL", ret, strerror(errno));
+ }
+
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+exit_free_name:
+ free(chrdev_name);
+ return ret < 0 ? ret : req.fd;
+}
+
/**
* gpiotools_set_values(): Set the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values want to set.
+ * gpiotools_request_line().
+ * @values: The array of values want to set.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
/**
* gpiotools_get_values(): Get the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values get from hardware.
+ * gpiotools_request_line().
+ * @values: The array of values get from hardware.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd)
}
/**
+ * gpiotools_release_line(): Release the line(s) of gpiochip
+ * @fd: The fd returned by
+ * gpiotools_request_line().
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_release_line(const int fd)
+{
+ int ret;
+
+ ret = close(fd);
+ if (ret == -1) {
+ perror("Failed to close GPIO LINE device file");
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+/**
* gpiotools_get(): Get value from specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
@@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd)
*/
int gpiotools_get(const char *device_name, unsigned int line)
{
- struct gpiohandle_data data;
+ int ret;
+ unsigned int value;
unsigned int lines[] = {line};
- gpiotools_gets(device_name, lines, 1, &data);
- return data.values[0];
+ ret = gpiotools_gets(device_name, lines, 1, &value);
+ if (ret)
+ return ret;
+ return value;
}
@@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line)
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values get from gpiochip.
+ * @num_lines: The number of lines to request.
+ * @values: The array of values get from gpiochip.
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int fd;
+ int fd, i;
int ret;
int ret_close;
+ struct gpio_v2_line_config config;
+ struct gpio_v2_line_values lv;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_INPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
fd = ret;
- ret = gpiotools_get_values(fd, data);
- ret_close = gpiotools_release_linehandle(fd);
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&lv.mask, i);
+ ret = gpiotools_get_values(fd, &lv);
+ if (!ret)
+ for (i = 0; i < num_lines; i++)
+ values[i] = gpiotools_test_bit(lv.bits, i);
+ ret_close = gpiotools_release_line(fd);
return ret < 0 ? ret : ret_close;
}
@@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value)
{
- struct gpiohandle_data data;
unsigned int lines[] = {line};
- data.values[0] = value;
- return gpiotools_sets(device_name, lines, 1, &data);
+ return gpiotools_sets(device_name, lines, 1, &value);
}
/**
@@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line,
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values set to gpiochip, must be
+ * @num_lines: The number of lines to request.
+ * @value: The array of values set to gpiochip, must be
* 0(low) or 1(high).
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int ret;
+ int ret, i;
+ struct gpio_v2_line_config config;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+ config.num_attrs = 1;
+ config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ for (i = 0; i < num_lines; i++) {
+ gpiotools_set_bit(&config.attrs[0].mask, i);
+ gpiotools_assign_bit(&config.attrs[0].attr.values,
+ i, values[i]);
+ }
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
- return gpiotools_release_linehandle(ret);
+ return gpiotools_release_line(ret);
}
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index cf37f13f3dcb..6c69a9f1c253 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -12,7 +12,9 @@
#ifndef _GPIO_UTILS_H_
#define _GPIO_UTILS_H_
+#include <stdbool.h>
#include <string.h>
+#include <linux/types.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix)
}
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label);
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
int gpiotools_release_linehandle(const int fd);
+int gpiotools_request_line(const char *device_name,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer);
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_release_line(const int fd);
+
int gpiotools_get(const char *device_name, unsigned int line);
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value);
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
+
+/* helper functions for gpio_v2_line_values bits */
+static inline void gpiotools_set_bit(__u64 *b, int n)
+{
+ *b |= _BITULL(n);
+}
+
+static inline void gpiotools_change_bit(__u64 *b, int n)
+{
+ *b ^= _BITULL(n);
+}
+
+static inline void gpiotools_clear_bit(__u64 *b, int n)
+{
+ *b &= ~_BITULL(n);
+}
+
+static inline int gpiotools_test_bit(__u64 b, int n)
+{
+ return !!(b & _BITULL(n));
+}
+
+static inline void gpiotools_assign_bit(__u64 *b, int n, bool value)
+{
+ if (value)
+ gpiotools_set_bit(b, n);
+ else
+ gpiotools_clear_bit(b, n);
+}
#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
index 5cea24fddfa7..f229ec62301b 100644
--- a/tools/gpio/gpio-watch.c
+++ b/tools/gpio/gpio-watch.c
@@ -21,8 +21,8 @@
int main(int argc, char **argv)
{
- struct gpioline_info_changed chg;
- struct gpioline_info req;
+ struct gpio_v2_line_info_changed chg;
+ struct gpio_v2_line_info req;
struct pollfd pfd;
int fd, i, j, ret;
char *event, *end;
@@ -40,11 +40,11 @@ int main(int argc, char **argv)
for (i = 0, j = 2; i < argc - 2; i++, j++) {
memset(&req, 0, sizeof(req));
- req.line_offset = strtoul(argv[j], &end, 0);
+ req.offset = strtoul(argv[j], &end, 0);
if (*end != '\0')
goto err_usage;
- ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req);
if (ret) {
perror("unable to set up line watch");
return EXIT_FAILURE;
@@ -71,13 +71,13 @@ int main(int argc, char **argv)
}
switch (chg.event_type) {
- case GPIOLINE_CHANGED_REQUESTED:
+ case GPIO_V2_LINE_CHANGED_REQUESTED:
event = "requested";
break;
- case GPIOLINE_CHANGED_RELEASED:
+ case GPIO_V2_LINE_CHANGED_RELEASED:
event = "released";
break;
- case GPIOLINE_CHANGED_CONFIG:
+ case GPIO_V2_LINE_CHANGED_CONFIG:
event = "config changed";
break;
default:
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
}
printf("line %u: %s at %llu\n",
- chg.info.line_offset, event, chg.timestamp);
+ chg.info.offset, event, chg.timestamp_ns);
}
}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index b08d7a5e779b..5a05a454d0c9 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -25,57 +25,73 @@
struct gpio_flag {
char *name;
- unsigned long mask;
+ unsigned long long mask;
};
struct gpio_flag flagnames[] = {
{
- .name = "kernel",
- .mask = GPIOLINE_FLAG_KERNEL,
+ .name = "used",
+ .mask = GPIO_V2_LINE_FLAG_USED,
+ },
+ {
+ .name = "input",
+ .mask = GPIO_V2_LINE_FLAG_INPUT,
},
{
.name = "output",
- .mask = GPIOLINE_FLAG_IS_OUT,
+ .mask = GPIO_V2_LINE_FLAG_OUTPUT,
},
{
.name = "active-low",
- .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW,
},
{
.name = "open-drain",
- .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN,
},
{
.name = "open-source",
- .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE,
},
{
.name = "pull-up",
- .mask = GPIOLINE_FLAG_BIAS_PULL_UP,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP,
},
{
.name = "pull-down",
- .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN,
},
{
.name = "bias-disabled",
- .mask = GPIOLINE_FLAG_BIAS_DISABLE,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
},
};
-void print_flags(unsigned long flags)
+static void print_attributes(struct gpio_v2_line_info *info)
{
int i;
- int printed = 0;
+ const char *field_format = "%s";
for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
- if (flags & flagnames[i].mask) {
- if (printed)
- fprintf(stdout, " ");
- fprintf(stdout, "%s", flagnames[i].name);
- printed++;
+ if (info->flags & flagnames[i].mask) {
+ fprintf(stdout, field_format, flagnames[i].name);
+ field_format = ", %s";
}
}
+
+ if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) &&
+ (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING))
+ fprintf(stdout, field_format, "both-edges");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING)
+ fprintf(stdout, field_format, "rising-edge");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)
+ fprintf(stdout, field_format, "falling-edge");
+
+ for (i = 0; i < info->num_attrs; i++) {
+ if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+ fprintf(stdout, ", debounce_period=%dusec",
+ info->attrs[0].debounce_period_us);
+ }
}
int list_device(const char *device_name)
@@ -109,18 +125,18 @@ int list_device(const char *device_name)
/* Loop over the lines and print info */
for (i = 0; i < cinfo.lines; i++) {
- struct gpioline_info linfo;
+ struct gpio_v2_line_info linfo;
memset(&linfo, 0, sizeof(linfo));
- linfo.line_offset = i;
+ linfo.offset = i;
- ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo);
if (ret == -1) {
ret = -errno;
perror("Failed to issue LINEINFO IOCTL\n");
goto exit_close_error;
}
- fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ fprintf(stdout, "\tline %2d:", linfo.offset);
if (linfo.name[0])
fprintf(stdout, " \"%s\"", linfo.name);
else
@@ -131,7 +147,7 @@ int list_device(const char *device_name)
fprintf(stdout, " unused");
if (linfo.flags) {
fprintf(stdout, " [");
- print_flags(linfo.flags);
+ print_attributes(&linfo);
fprintf(stdout, "]");
}
fprintf(stdout, "\n");
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644
index 000000000000..89135bb35bf7
--- /dev/null
+++ b/tools/include/linux/static_call_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX __SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL /* tail call */
+#define STATIC_CALL_SITE_INIT 2UL /* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..f2b5d72a46c2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
/* fs/splice.c */
#define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
#define __NR_sendmmsg 269
__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
#define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
- compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
#define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
- compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
#define __NR_kcmp 272
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 33ba98d72b16..99d00870b160 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -3,9 +3,9 @@
C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
-Store, e.g., WRITE_ONCE() Y Y
-Load, e.g., READ_ONCE() Y Y Y Y
-Unsuccessful RMW operation Y Y Y Y
+Relaxed store Y Y
+Relaxed load Y Y Y Y
+Relaxed RMW operation Y Y Y Y
rcu_dereference() Y Y Y Y
Successful *_acquire() R Y Y Y Y Y Y
Successful *_release() C Y Y Y W Y
@@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y
smp_mb__after_atomic() CP a a Y Y Y Y Y Y
-Key: C: Ordering is cumulative
- P: Ordering propagates
- R: Read, for example, READ_ONCE(), or read portion of RMW
- W: Write, for example, WRITE_ONCE(), or write portion of RMW
- Y: Provides ordering
- a: Provides ordering given intervening RMW atomic operation
- DR: Dependent read (address dependency)
- DW: Dependent write (address, data, or control dependency)
- RMW: Atomic read-modify-write operation
- SELF: Orders self, as opposed to accesses before and/or after
- SV: Orders later accesses to the same variable
+Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
+ a *_relaxed() RMW operation, an unsuccessful RMW
+ operation, a non-value-returning RMW operation such
+ as atomic_inc(), or one of the atomic*_read() and
+ atomic*_set() family of operations.
+ C: Ordering is cumulative
+ P: Ordering propagates
+ R: Read, for example, READ_ONCE(), or read portion of RMW
+ W: Write, for example, WRITE_ONCE(), or write portion of RMW
+ Y: Provides ordering
+ a: Provides ordering given intervening RMW atomic operation
+ DR: Dependent read (address dependency)
+ DW: Dependent write (address, data, or control dependency)
+ RMW: Atomic read-modify-write operation
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
new file mode 100644
index 000000000000..2f840dcd15cf
--- /dev/null
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -0,0 +1,1074 @@
+Linux-Kernel Memory Model Litmus Tests
+======================================
+
+This file describes the LKMM litmus-test format by example, describes
+some tricks and traps, and finally outlines LKMM's limitations. Earlier
+versions of this material appeared in a number of LWN articles, including:
+
+https://lwn.net/Articles/720550/
+ A formal kernel memory-ordering model (part 2)
+https://lwn.net/Articles/608550/
+ Axiomatic validation of memory barriers and atomic instructions
+https://lwn.net/Articles/470681/
+ Validating Memory Barriers and Atomic Instructions
+
+This document presents information in decreasing order of applicability,
+so that, where possible, the information that has proven more commonly
+useful is shown near the beginning.
+
+For information on installing LKMM, including the underlying "herd7"
+tool, please see tools/memory-model/README.
+
+
+Copy-Pasta
+==========
+
+As with other software, it is often better (if less macho) to adapt an
+existing litmus test than it is to create one from scratch. A number
+of litmus tests may be found in the kernel source tree:
+
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available on github
+and kernel.org:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+The -l and -L arguments to "git grep" can be quite helpful in identifying
+existing litmus tests that are similar to the one you need. But even if
+you start with an existing litmus test, it is still helpful to have a
+good understanding of the litmus-test format.
+
+
+Examples and Format
+===================
+
+This section describes the overall format of litmus tests, starting
+with a small example of the message-passing pattern and moving on to
+more complex examples that illustrate explicit initialization and LKMM's
+minimalistic set of flow-control statements.
+
+
+Message-Passing Example
+-----------------------
+
+This section gives an overview of the format of a litmus test using an
+example based on the common message-passing use case. This use case
+appears often in the Linux kernel. For example, a flag (modeled by "y"
+below) indicates that a buffer (modeled by "x" below) is now completely
+filled in and ready for use. It would be very bad if the consumer saw the
+flag set, but, due to memory misordering, saw old values in the buffer.
+
+This example asks whether smp_store_release() and smp_load_acquire()
+suffices to avoid this bad outcome:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 WRITE_ONCE(*x, 1);
+ 8 smp_store_release(y, 1);
+ 9 }
+10
+11 P1(int *x, int *y)
+12 {
+13 int r0;
+14 int r1;
+15
+16 r0 = smp_load_acquire(y);
+17 r1 = READ_ONCE(*x);
+18 }
+19
+20 exists (1:r0=1 /\ 1:r1=0)
+
+Line 1 starts with "C", which identifies this file as being in the
+LKMM C-language format (which, as we will see, is a small fragment
+of the full C language). The remainder of line 1 is the name of
+the test, which by convention is the filename with the ".litmus"
+suffix stripped. In this case, the actual test may be found in
+tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+in the Linux-kernel source tree.
+
+Mechanically generated litmus tests will often have an optional
+double-quoted comment string on the second line. Such strings are ignored
+when running the test. Yes, you can add your own comments to litmus
+tests, but this is a bit involved due to the use of multiple parsers.
+For now, you can use C-language comments in the C code, and these comments
+may be in either the "/* */" or the "//" style. A later section will
+cover the full litmus-test commenting story.
+
+Line 3 is the initialization section. Because the default initialization
+to zero suffices for this test, the "{}" syntax is used, which mean the
+initialization section is empty. Litmus tests requiring non-default
+initialization must have non-empty initialization sections, as in the
+example that will be presented later in this document.
+
+Lines 5-9 show the first process and lines 11-18 the second process. Each
+process corresponds to a Linux-kernel task (or kthread, workqueue, thread,
+and so on; LKMM discussions often use these terms interchangeably).
+The name of the first process is "P0" and that of the second "P1".
+You can name your processes anything you like as long as the names consist
+of a single "P" followed by a number, and as long as the numbers are
+consecutive starting with zero. This can actually be quite helpful,
+for example, a .litmus file matching "^P1(" but not matching "^P2("
+must contain a two-process litmus test.
+
+The argument list for each function are pointers to the global variables
+used by that function. Unlike normal C-language function parameters, the
+names are significant. The fact that both P0() and P1() have a formal
+parameter named "x" means that these two processes are working with the
+same global variable, also named "x". So the "int *x, int *y" on P0()
+and P1() mean that both processes are working with two shared global
+variables, "x" and "y". Global variables are always passed to processes
+by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)".
+
+P0() has no local variables, but P1() has two of them named "r0" and "r1".
+These names may be freely chosen, but for historical reasons stemming from
+other litmus-test formats, it is conventional to use names consisting of
+"r" followed by a number as shown here. A common bug in litmus tests
+is forgetting to add a global variable to a process's parameter list.
+This will sometimes result in an error message, but can also cause the
+intended global to instead be silently treated as an undeclared local
+variable.
+
+Each process's code is similar to Linux-kernel C, as can be seen on lines
+7-8 and 13-17. This code may use many of the Linux kernel's atomic
+operations, some of its exclusive-lock functions, and some of its RCU
+and SRCU functions. An approximate list of the currently supported
+functions may be found in the linux-kernel.def file.
+
+The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a
+pointer in P0()'s parameter list, this does an unordered store to global
+variable "x". Line 8 does "smp_store_release(y, 1)", and because "y"
+is also in P0()'s parameter list, this does a release store to global
+variable "y".
+
+The P1() process declares two local variables on lines 13 and 14.
+Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load
+from global variable "y" into local variable "r0". Line 17 does a
+"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local
+variable "r1". Both "x" and "y" are in P1()'s parameter list, so both
+reference the same global variables that are used by P0().
+
+Line 20 is the "exists" assertion expression to evaluate the final state.
+This final state is evaluated after the dust has settled: both processes
+have completed and all of their memory references and memory barriers
+have propagated to all parts of the system. The references to the local
+variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify
+which process they are local to.
+
+Note that the assertion expression is written in the litmus-test
+language rather than in C. For example, single "=" is an equality
+operator rather than an assignment. The "/\" character combination means
+"and". Similarly, "\/" stands for "or". Both of these are ASCII-art
+representations of the corresponding mathematical symbols. Finally,
+"~" stands for "logical not", which is "!" in C, and not to be confused
+with the C-language "~" operator which instead stands for "bitwise not".
+Parentheses may be used to override precedence.
+
+The "exists" assertion on line 20 is satisfied if the consumer sees the
+flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1()
+loaded a value from "x" that was equal to 1 but loaded a value from "y"
+that was still equal to zero.
+
+This example can be checked by running the following command, which
+absolutely must be run from the tools/memory-model directory and from
+this directory only:
+
+herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus
+
+The output is the result of something similar to a full state-space
+search, and is as follows:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=0)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.00
+12 Hash=579aaa14d8c35a39429b02e698241d09
+
+The most pertinent line is line 10, which contains "Never 0 3", which
+indicates that the bad result flagged by the "exists" clause never
+happens. This line might instead say "Sometimes" to indicate that the
+bad result happened in some but not all executions, or it might say
+"Always" to indicate that the bad result happened in all executions.
+(The herd7 tool doesn't judge, so it is only an LKMM convention that the
+"exists" clause indicates a bad result. To see this, invert the "exists"
+clause's condition and run the test.) The numbers ("0 3") at the end
+of this line indicate the number of end states satisfying the "exists"
+clause (0) and the number not not satisfying that clause (3).
+
+Another important part of this output is shown in lines 2-5, repeated here:
+
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+
+Line 2 gives the total number of end states, and each of lines 3-5 list
+one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that
+both of P1()'s loads returned the value "0". As expected, given the
+"Never" on line 10, the state flagged by the "exists" clause is not
+listed. This full list of states can be helpful when debugging a new
+litmus test.
+
+The rest of the output is not normally needed, either due to irrelevance
+or due to being redundant with the lines discussed above. However, the
+following paragraph lists them for the benefit of readers possessed of
+an insatiable curiosity. Other readers should feel free to skip ahead.
+
+Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's
+"No" says that the "exists" clause was not satisfied by any execution,
+and as such it has the same meaning as line 10's "Never". Line 7 is a
+lead-in to line 8's "Positive: 0 Negative: 3", which lists the number
+of end states satisfying and not satisfying the "exists" clause, just
+like the two numbers at the end of line 10. Line 9 repeats the "exists"
+clause so that you don't have to look it up in the litmus-test file.
+The number at the end of line 11 (which begins with "Time") gives the
+time in seconds required to analyze the litmus test. Small tests such
+as this one complete in a few milliseconds, so "0.00" is quite common.
+Line 12 gives a hash of the contents for the litmus-test file, and is used
+by tooling that manages litmus tests and their output. This tooling is
+used by people modifying LKMM itself, and among other things lets such
+people know which of the several thousand relevant litmus tests were
+affected by a given change to LKMM.
+
+
+Initialization
+--------------
+
+The previous example relied on the default zero initialization for
+"x" and "y", but a similar litmus test could instead initialize them
+to some other value:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {
+ 4 x=42;
+ 5 y=42;
+ 6 }
+ 7
+ 8 P0(int *x, int *y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 smp_store_release(y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17 int r1;
+18
+19 r0 = smp_load_acquire(y);
+20 r1 = READ_ONCE(*x);
+21 }
+22
+23 exists (1:r0=1 /\ 1:r1=42)
+
+Lines 3-6 now initialize both "x" and "y" to the value 42. This also
+means that the "exists" clause on line 23 must change "1:r1=0" to
+"1:r1=42".
+
+Running the test gives the same overall result as before, but with the
+value 42 appearing in place of the value zero:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=1; 1:r1=1;
+ 4 1:r0=42; 1:r1=1;
+ 5 1:r0=42; 1:r1=42;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=42)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.02
+12 Hash=ab9a9b7940a75a792266be279a980156
+
+It is tempting to avoid the open-coded repetitions of the value "42"
+by defining another global variable "initval=42" and replacing all
+occurrences of "42" with "initval". This will not, repeat *not*,
+initialize "x" and "y" to 42, but instead to the address of "initval"
+(try it!). See the section below on linked lists to learn more about
+why this approach to initialization can be useful.
+
+
+Control Structures
+------------------
+
+LKMM supports the C-language "if" statement, which allows modeling of
+conditional branches. In LKMM, conditional branches can affect ordering,
+but only if you are *very* careful (compilers are surprisingly able
+to optimize away conditional branches). The following example shows
+the "load buffering" (LB) use case that is used in the Linux kernel to
+synchronize between ring-buffer producers and consumers. In the example
+below, P0() is one side checking to see if an operation may proceed and
+P1() is the other side completing its update.
+
+ 1 C LB+fencembonceonce+ctrlonceonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r0;
+ 8
+ 9 r0 = READ_ONCE(*x);
+10 if (r0)
+11 WRITE_ONCE(*y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17
+18 r0 = READ_ONCE(*y);
+19 smp_mb();
+20 WRITE_ONCE(*x, 1);
+21 }
+22
+23 exists (0:r0=1 /\ 1:r0=1)
+
+P1()'s "if" statement on line 10 works as expected, so that line 11 is
+executed only if line 9 loads a non-zero value from "x". Because P1()'s
+write of "1" to "x" happens only after P1()'s read from "y", one would
+hope that the "exists" clause cannot be satisfied. LKMM agrees:
+
+ 1 Test LB+fencembonceonce+ctrlonceonce Allowed
+ 2 States 2
+ 3 0:r0=0; 1:r0=0;
+ 4 0:r0=1; 1:r0=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r0=1 /\ 1:r0=1)
+ 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2
+10 Time LB+fencembonceonce+ctrlonceonce 0.00
+11 Hash=e5260556f6de495fd39b556d1b831c3b
+
+However, there is no "while" statement due to the fact that full
+state-space search has some difficulty with iteration. However, there
+are tricks that may be used to handle some special cases, which are
+discussed below. In addition, loop-unrolling tricks may be applied,
+albeit sparingly.
+
+
+Tricks and Traps
+================
+
+This section covers extracting debug output from herd7, emulating
+spin loops, handling trivial linked lists, adding comments to litmus tests,
+emulating call_rcu(), and finally tricks to improve herd7 performance
+in order to better handle large litmus tests.
+
+
+Debug Output
+------------
+
+By default, the herd7 state output includes all variables mentioned
+in the "exists" clause. But sometimes debugging efforts are greatly
+aided by the values of other variables. Consider this litmus test
+(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but
+slightly modified), which probes an obscure corner of hardware memory
+ordering:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output is as follows:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r2=0; 1:r4=0;
+ 4 0:r2=0; 1:r4=1;
+ 5 0:r2=1; 1:r4=0;
+ 6 0:r2=1; 1:r4=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=c7f30fe0faebb7d565405d55b7318ada
+
+(This output indicates that CPUs are permitted to "snoop their own
+store buffers", which all of Linux's CPU families other than s390 will
+happily do. Such snooping results in disagreement among CPUs on the
+order of stores from different CPUs, which is rarely an issue.)
+
+But the herd7 output shows only the two variables mentioned in the
+"exists" clause. Someone modifying this test might wish to know the
+values of "x", "y", "0:r1", and "0:r3" as well. The "locations"
+statement on line 25 shows how to cause herd7 to display additional
+variables:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 locations [0:r1; 1:r3; x; y]
+26 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output then displays the values of all the variables:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1;
+ 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1;
+ 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1;
+ 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=40de8418c4b395388f6501cafd1ed38d
+
+What if you would like to know the value of a particular global variable
+at some particular point in a given process's execution? One approach
+is to use a READ_ONCE() to load that global variable into a new local
+variable, then add that local variable to the "locations" clause.
+But be careful: In some litmus tests, adding a READ_ONCE() will change
+the outcome! For one example, please see the C-READ_ONCE.litmus and
+C-READ_ONCE-omitted.litmus tests located here:
+
+ https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/
+
+
+Spin Loops
+----------
+
+The analysis carried out by herd7 explores full state space, which is
+at best of exponential time complexity. Adding processes and increasing
+the amount of code in a give process can greatly increase execution time.
+Potentially infinite loops, such as those used to wait for locks to
+become available, are clearly problematic.
+
+Fortunately, it is possible to avoid state-space explosion by specially
+modeling such loops. For example, the following litmus tests emulates
+locking using xchg_acquire(), but instead of enclosing xchg_acquire()
+in a spin loop, it instead excludes executions that fail to acquire the
+lock using a herd7 "filter" clause. Note that for exclusive locking, you
+are better off using the spin_lock() and spin_unlock() that LKMM directly
+models, if for no other reason that these are much faster. However, the
+techniques illustrated in this section can be used for other purposes,
+such as emulating reader-writer locking, which LKMM does not yet model.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 }
+ 5
+ 6 P0(int *sl, int *x0, int *x1)
+ 7 {
+ 8 int r2;
+ 9 int r1;
+10
+11 r2 = xchg_acquire(sl, 1);
+12 WRITE_ONCE(*x0, 1);
+13 r1 = READ_ONCE(*x1);
+14 smp_store_release(sl, 0);
+15 }
+16
+17 P1(int *sl, int *x0, int *x1)
+18 {
+19 int r2;
+20 int r1;
+21
+22 r2 = xchg_acquire(sl, 1);
+23 WRITE_ONCE(*x1, 1);
+24 r1 = READ_ONCE(*x0);
+25 smp_store_release(sl, 0);
+26 }
+27
+28 filter (0:r2=0 /\ 1:r2=0)
+29 exists (0:r1=0 /\ 1:r1=0)
+
+This litmus test may be found here:
+
+https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus
+
+This test uses two global variables, "x1" and "x2", and also emulates a
+single global spinlock named "sl". This spinlock is held by whichever
+process changes the value of "sl" from "0" to "1", and is released when
+that process sets "sl" back to "0". P0()'s lock acquisition is emulated
+on line 11 using xchg_acquire(), which unconditionally stores the value
+"1" to "sl" and stores either "0" or "1" to "r2", depending on whether
+the lock acquisition was successful or unsuccessful (due to "sl" already
+having the value "1"), respectively. P1() operates in a similar manner.
+
+Rather unconventionally, execution appears to proceed to the critical
+section on lines 12 and 13 in either case. Line 14 then uses an
+smp_store_release() to store zero to "sl", thus emulating lock release.
+
+The case where xchg_acquire() fails to acquire the lock is handled by
+the "filter" clause on line 28, which tells herd7 to keep only those
+executions in which both "0:r2" and "1:r2" are zero, that is to pay
+attention only to those executions in which both locks are actually
+acquired. Thus, the bogus executions that would execute the critical
+sections are discarded and any effects that they might have had are
+ignored. Note well that the "filter" clause keeps those executions
+for which its expression is satisfied, that is, for which the expression
+evaluates to true. In other words, the "filter" clause says what to
+keep, not what to discard.
+
+The result of running this test is as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 2
+ 3 0:r1=0; 1:r1=1;
+ 4 0:r1=1; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r1=0 /\ 1:r1=0)
+ 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2
+10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03
+
+The "Never" on line 9 indicates that this use of xchg_acquire() and
+smp_store_release() really does correctly emulate locking.
+
+Why doesn't the litmus test take the simpler approach of using a spin loop
+to handle failed spinlock acquisitions, like the kernel does? The key
+insight behind this litmus test is that spin loops have no effect on the
+possible "exists"-clause outcomes of program execution in the absence
+of deadlock. In other words, given a high-quality lock-acquisition
+primitive in a deadlock-free program running on high-quality hardware,
+each lock acquisition will eventually succeed. Because herd7 already
+explores the full state space, the length of time required to actually
+acquire the lock does not matter. After all, herd7 already models all
+possible durations of the xchg_acquire() statements.
+
+Why not just add the "filter" clause to the "exists" clause, thus
+avoiding the "filter" clause entirely? This does work, but is slower.
+The reason that the "filter" clause is faster is that (in the common case)
+herd7 knows to abandon an execution as soon as the "filter" expression
+fails to be satisfied. In contrast, the "exists" clause is evaluated
+only at the end of time, thus requiring herd7 to waste time on bogus
+executions in which both critical sections proceed concurrently. In
+addition, some LKMM users like the separation of concerns provided by
+using the both the "filter" and "exists" clauses.
+
+Readers lacking a pathological interest in odd corner cases should feel
+free to skip the remainder of this section.
+
+But what if the litmus test were to temporarily set "0:r2" to a non-zero
+value? Wouldn't that cause herd7 to abandon the execution prematurely
+due to an early mismatch of the "filter" clause?
+
+Why not just try it? Line 4 of the following modified litmus test
+introduces a new global variable "x2" that is initialized to "1". Line 23
+of P1() reads that variable into "1:r2" to force an early mismatch with
+the "filter" clause. Line 24 does a known-true "if" condition to avoid
+and static analysis that herd7 might do. Finally the "exists" clause
+on line 32 is updated to a condition that is alway satisfied at the end
+of the test.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 x2=1;
+ 5 }
+ 6
+ 7 P0(int *sl, int *x0, int *x1)
+ 8 {
+ 9 int r2;
+10 int r1;
+11
+12 r2 = xchg_acquire(sl, 1);
+13 WRITE_ONCE(*x0, 1);
+14 r1 = READ_ONCE(*x1);
+15 smp_store_release(sl, 0);
+16 }
+17
+18 P1(int *sl, int *x0, int *x1, int *x2)
+19 {
+20 int r2;
+21 int r1;
+22
+23 r2 = READ_ONCE(*x2);
+24 if (r2)
+25 r2 = xchg_acquire(sl, 1);
+26 WRITE_ONCE(*x1, 1);
+27 r1 = READ_ONCE(*x0);
+28 smp_store_release(sl, 0);
+29 }
+30
+31 filter (0:r2=0 /\ 1:r2=0)
+32 exists (x1=1)
+
+If the "filter" clause were to check each variable at each point in the
+execution, running this litmus test would display no executions because
+all executions would be filtered out at line 23. However, the output
+is instead as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 1
+ 3 x1=1;
+ 4 Ok
+ 5 Witnesses
+ 6 Positive: 2 Negative: 0
+ 7 Condition exists (x1=1)
+ 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0
+ 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04
+10 Hash=080bc508da7f291e122c6de76c0088e3
+
+Line 3 shows that there is one execution that did not get filtered out,
+so the "filter" clause is evaluated only on the last assignment to
+the variables that it checks. In this case, the "filter" clause is a
+disjunction, so it might be evaluated twice, once at the final (and only)
+assignment to "0:r2" and once at the final assignment to "1:r2".
+
+
+Linked Lists
+------------
+
+LKMM can handle linked lists, but only linked lists in which each node
+contains nothing except a pointer to the next node in the list. This is
+of course quite restrictive, but there is nevertheless quite a bit that
+can be done within these confines, as can be seen in the litmus test
+at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus:
+
+ 1 C MP+onceassign+derefonce
+ 2
+ 3 {
+ 4 y=z;
+ 5 z=0;
+ 6 }
+ 7
+ 8 P0(int *x, int **y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 rcu_assign_pointer(*y, x);
+12 }
+13
+14 P1(int *x, int **y)
+15 {
+16 int *r0;
+17 int r1;
+18
+19 rcu_read_lock();
+20 r0 = rcu_dereference(*y);
+21 r1 = READ_ONCE(*r0);
+22 rcu_read_unlock();
+23 }
+24
+25 exists (1:r0=x /\ 1:r1=0)
+
+Line 4's "y=z" may seem odd, given that "z" has not yet been initialized.
+But "y=z" does not set the value of "y" to that of "z", but instead
+sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore
+create a simple linked list, with "y" pointing to "z" and "z" having a
+NULL pointer. A much longer linked list could be created if desired,
+and circular singly linked lists can also be created and manipulated.
+
+The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s
+"r0" not to the value of "x", but again to its address. This term of the
+"exists" clause therefore tests whether line 20's load from "y" saw the
+value stored by line 11, which is in fact what is required in this case.
+
+P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x"
+from "y", replacing "z".
+
+P1()'s line 20 loads a pointer from "y", and line 21 dereferences that
+pointer. The RCU read-side critical section spanning lines 19-22 is just
+for show in this example. Note that the address used for line 21's load
+depends on (in this case, "is exactly the same as") the value loaded by
+line 20. This is an example of what is called an "address dependency".
+This particular address dependency extends from the load on line 20 to the
+load on line 21. Address dependencies provide a weak form of ordering.
+
+Running this test results in the following:
+
+ 1 Test MP+onceassign+derefonce Allowed
+ 2 States 2
+ 3 1:r0=x; 1:r1=1;
+ 4 1:r0=z; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (1:r0=x /\ 1:r1=0)
+ 9 Observation MP+onceassign+derefonce Never 0 2
+10 Time MP+onceassign+derefonce 0.00
+11 Hash=49ef7a741563570102448a256a0c8568
+
+The only possible outcomes feature P1() loading a pointer to "z"
+(which contains zero) on the one hand and P1() loading a pointer to "x"
+(which contains the value one) on the other. This should be reassuring
+because it says that RCU readers cannot see the old preinitialization
+values when accessing a newly inserted list node. This undesirable
+scenario is flagged by the "exists" clause, and would occur if P1()
+loaded a pointer to "x", but obtained the pre-initialization value of
+zero after dereferencing that pointer.
+
+
+Comments
+--------
+
+Different portions of a litmus test are processed by different parsers,
+which has the charming effect of requiring different comment syntax in
+different portions of the litmus test. The C-syntax portions use
+C-language comments (either "/* */" or "//"), while the other portions
+use Ocaml comments "(* *)".
+
+The following litmus test illustrates the comment style corresponding
+to each syntactic unit of the test:
+
+ 1 C MP+onceassign+derefonce (* A *)
+ 2
+ 3 (* B *)
+ 4
+ 5 {
+ 6 y=z; (* C *)
+ 7 z=0;
+ 8 } // D
+ 9
+10 // E
+11
+12 P0(int *x, int **y) // F
+13 {
+14 WRITE_ONCE(*x, 1); // G
+15 rcu_assign_pointer(*y, x);
+16 }
+17
+18 // H
+19
+20 P1(int *x, int **y)
+21 {
+22 int *r0;
+23 int r1;
+24
+25 rcu_read_lock();
+26 r0 = rcu_dereference(*y);
+27 r1 = READ_ONCE(*r0);
+28 rcu_read_unlock();
+29 }
+30
+31 // I
+32
+33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *)
+
+In short, use C-language comments in the C code and Ocaml comments in
+the rest of the litmus test.
+
+On the other hand, if you prefer C-style comments everywhere, the
+C preprocessor is your friend.
+
+
+Asynchronous RCU Grace Periods
+------------------------------
+
+The following litmus test is derived from the example show in
+Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to
+emulate call_rcu():
+
+ 1 C RCU+sync+free
+ 2
+ 3 {
+ 4 int x = 1;
+ 5 int *y = &x;
+ 6 int z = 1;
+ 7 }
+ 8
+ 9 P0(int *x, int *z, int **y)
+10 {
+11 int *r0;
+12 int r1;
+13
+14 rcu_read_lock();
+15 r0 = rcu_dereference(*y);
+16 r1 = READ_ONCE(*r0);
+17 rcu_read_unlock();
+18 }
+19
+20 P1(int *z, int **y, int *c)
+21 {
+22 rcu_assign_pointer(*y, z);
+23 smp_store_release(*c, 1); // Emulate call_rcu().
+24 }
+25
+26 P2(int *x, int *z, int **y, int *c)
+27 {
+28 int r0;
+29
+30 r0 = smp_load_acquire(*c); // Note call_rcu() request.
+31 synchronize_rcu(); // Wait one grace period.
+32 WRITE_ONCE(*x, 0); // Emulate the RCU callback.
+33 }
+34
+35 filter (2:r0=1) (* Reject too-early starts. *)
+36 exists (0:r0=x /\ 0:r1=0)
+
+Lines 4-6 initialize a linked list headed by "y" that initially contains
+"x". In addition, "z" is pre-initialized to prepare for P1(), which
+will replace "x" with "z" in this list.
+
+P0() on lines 9-18 enters an RCU read-side critical section, loads the
+list header "y" and dereferences it, leaving the node in "0:r0" and
+the node's value in "0:r1".
+
+P1() on lines 20-24 updates the list header to instead reference "z",
+then emulates call_rcu() by doing a release store into "c".
+
+P2() on lines 27-33 emulates the behind-the-scenes effect of doing a
+call_rcu(). Line 30 first does an acquire load from "c", then line 31
+waits for an RCU grace period to elapse, and finally line 32 emulates
+the RCU callback, which in turn emulates a call to kfree().
+
+Of course, it is possible for P2() to start too soon, so that the
+value of "2:r0" is zero rather than the required value of "1".
+The "filter" clause on line 35 handles this possibility, rejecting
+all executions in which "2:r0" is not equal to the value "1".
+
+
+Performance
+-----------
+
+LKMM's exploration of the full state-space can be extremely helpful,
+but it does not come for free. The price is exponential computational
+complexity in terms of the number of processes, the average number
+of statements in each process, and the total number of stores in the
+litmus test.
+
+So it is best to start small and then work up. Where possible, break
+your code down into small pieces each representing a core concurrency
+requirement.
+
+That said, herd7 is quite fast. On an unprepossessing x86 laptop, it
+was able to analyze the following 10-process RCU litmus test in about
+six seconds.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus
+
+One way to make herd7 run faster is to use the "-speedcheck true" option.
+This option prevents herd7 from generating all possible end states,
+instead causing it to focus solely on whether or not the "exists"
+clause can be satisfied. With this option, herd7 evaluates the above
+litmus test in about 300 milliseconds, for more than an order of magnitude
+improvement in performance.
+
+Larger 16-process litmus tests that would normally consume 15 minutes
+of time complete in about 40 seconds with this option. To be fair,
+you do get an extra 65,535 states when you leave off the "-speedcheck
+true" option.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus
+
+Nevertheless, litmus-test analysis really is of exponential complexity,
+whether with or without "-speedcheck true". Increasing by just three
+processes to a 19-process litmus test requires 2 hours and 40 minutes
+without, and about 8 minutes with "-speedcheck true". Each of these
+results represent roughly an order of magnitude slowdown compared to the
+16-process litmus test. Again, to be fair, the multi-hour run explores
+no fewer than 524,287 additional states compared to the shorter one.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus
+
+If you don't like command-line arguments, you can obtain a similar speedup
+by adding a "filter" clause with exactly the same expression as your
+"exists" clause.
+
+However, please note that seeing the full set of states can be extremely
+helpful when developing and debugging litmus tests.
+
+
+LIMITATIONS
+===========
+
+Limitations of the Linux-kernel memory model (LKMM) include:
+
+1. Compiler optimizations are not accurately modeled. Of course,
+ the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
+ ability to optimize, but under some circumstances it is possible
+ for the compiler to undermine the memory model. For more
+ information, see Documentation/explanation.txt (in particular,
+ the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
+ sections).
+
+ Note that this limitation in turn limits LKMM's ability to
+ accurately model address, control, and data dependencies.
+ For example, if the compiler can deduce the value of some variable
+ carrying a dependency, then the compiler can break that dependency
+ by substituting a constant of that value.
+
+2. Multiple access sizes for a single variable are not supported,
+ and neither are misaligned or partially overlapping accesses.
+
+3. Exceptions and interrupts are not modeled. In some cases,
+ this limitation can be overcome by modeling the interrupt or
+ exception with an additional process.
+
+4. I/O such as MMIO or DMA is not supported.
+
+5. Self-modifying code (such as that found in the kernel's
+ alternatives mechanism, function tracer, Berkeley Packet Filter
+ JIT compiler, and module loader) is not supported.
+
+6. Complete modeling of all variants of atomic read-modify-write
+ operations, locking primitives, and RCU is not provided.
+ For example, call_rcu() and rcu_barrier() are not supported.
+ However, a substantial amount of support is provided for these
+ operations, as shown in the linux-kernel.def file.
+
+ Here are specific limitations:
+
+ a. When rcu_assign_pointer() is passed NULL, the Linux
+ kernel provides no ordering, but LKMM models this
+ case as a store release.
+
+ b. The "unless" RMW operations are not currently modeled:
+ atomic_long_add_unless(), atomic_inc_unless_negative(),
+ and atomic_dec_unless_positive(). These can be emulated
+ in litmus tests, for example, by using atomic_cmpxchg().
+
+ One exception of this limitation is atomic_add_unless(),
+ which is provided directly by herd7 (so no corresponding
+ definition in linux-kernel.def). atomic_add_unless() is
+ modeled by herd7 therefore it can be used in litmus tests.
+
+ c. The call_rcu() function is not modeled. As was shown above,
+ it can be emulated in litmus tests by adding another
+ process that invokes synchronize_rcu() and the body of the
+ callback function, with (for example) a release-acquire
+ from the site of the emulated call_rcu() to the beginning
+ of the additional process.
+
+ d. The rcu_barrier() function is not modeled. It can be
+ emulated in litmus tests emulating call_rcu() via
+ (for example) a release-acquire from the end of each
+ additional call_rcu() process to the site of the
+ emulated rcu-barrier().
+
+ e. Although sleepable RCU (SRCU) is now modeled, there
+ are some subtle differences between its semantics and
+ those in the Linux kernel. For example, the kernel
+ might interpret the following sequence as two partially
+ overlapping SRCU read-side critical sections:
+
+ 1 r1 = srcu_read_lock(&my_srcu);
+ 2 do_something_1();
+ 3 r2 = srcu_read_lock(&my_srcu);
+ 4 do_something_2();
+ 5 srcu_read_unlock(&my_srcu, r1);
+ 6 do_something_3();
+ 7 srcu_read_unlock(&my_srcu, r2);
+
+ In contrast, LKMM will interpret this as a nested pair of
+ SRCU read-side critical sections, with the outer critical
+ section spanning lines 1-7 and the inner critical section
+ spanning lines 3-5.
+
+ This difference would be more of a concern had anyone
+ identified a reasonable use case for partially overlapping
+ SRCU read-side critical sections. For more information
+ on the trickiness of such overlapping, please see:
+ https://paulmck.livejournal.com/40593.html
+
+ f. Reader-writer locking is not modeled. It can be
+ emulated in litmus tests using atomic read-modify-write
+ operations.
+
+The fragment of the C language supported by these litmus tests is quite
+limited and in some ways non-standard:
+
+1. There is no automatic C-preprocessor pass. You can of course
+ run it manually, if you choose.
+
+2. There is no way to create functions other than the Pn() functions
+ that model the concurrent processes.
+
+3. The Pn() functions' formal parameters must be pointers to the
+ global shared variables. Nothing can be passed by value into
+ these functions.
+
+4. The only functions that can be invoked are those built directly
+ into herd7 or that are defined in the linux-kernel.def file.
+
+5. The "switch", "do", "for", "while", and "goto" C statements are
+ not supported. The "switch" statement can be emulated by the
+ "if" statement. The "do", "for", and "while" statements can
+ often be emulated by manually unrolling the loop, or perhaps by
+ enlisting the aid of the C preprocessor to minimize the resulting
+ code duplication. Some uses of "goto" can be emulated by "if",
+ and some others by unrolling.
+
+6. Although you can use a wide variety of types in litmus-test
+ variable declarations, and especially in global-variable
+ declarations, the "herd7" tool understands only int and
+ pointer types. There is no support for floating-point types,
+ enumerations, characters, strings, arrays, or structures.
+
+7. Parsing of variable declarations is very loose, with almost no
+ type checking.
+
+8. Initializers differ from their C-language counterparts.
+ For example, when an initializer contains the name of a shared
+ variable, that name denotes a pointer to that variable, not
+ the current value of that variable. For example, "int x = y"
+ is interpreted the way "int x = &y" would be in C.
+
+9. Dynamic memory allocation is not supported, although this can
+ be worked around in some cases by supplying multiple statically
+ allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 63c4adfed884..03f58b11c252 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -1,7 +1,7 @@
This document provides "recipes", that is, litmus tests for commonly
occurring situations, as well as a few that illustrate subtly broken but
attractive nuisances. Many of these recipes include example code from
-v4.13 of the Linux kernel.
+v5.7 of the Linux kernel.
The first section covers simple special cases, the second section
takes off the training wheels to cover more involved examples,
@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
first place (control dependency). Note that the term "data dependency"
is sometimes casually used to cover both address and data dependencies.
-In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
rcu_assign_pointer(), and the next_prime_number() function invokes
rcu_dereference(). This combination mediates access to a bit vector
that is expanded as additional primes are needed.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ecbbaa5396d4..c5fdfd19df24 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
semantics of the weak consistency model specification language
- cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+ cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
Memory-model comparisons
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
new file mode 100644
index 000000000000..81e1a0ec5342
--- /dev/null
+++ b/tools/memory-model/Documentation/simple.txt
@@ -0,0 +1,271 @@
+This document provides options for those wishing to keep their
+memory-ordering lives simple, as is necessary for those whose domain
+is complex. After all, there are bugs other than memory-ordering bugs,
+and the time spent gaining memory-ordering knowledge is not available
+for gaining domain knowledge. Furthermore Linux-kernel memory model
+(LKMM) is quite complex, with subtle differences in code often having
+dramatic effects on correctness.
+
+The options near the beginning of this list are quite simple. The idea
+is not that kernel hackers don't already know about them, but rather
+that they might need the occasional reminder.
+
+Please note that this is a generic guide, and that specific subsystems
+will often have special requirements or idioms. For example, developers
+of MMIO-based device drivers will often need to use mb(), rmb(), and
+wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb()
+to be more natural than smp_load_acquire() and smp_store_release().
+On the other hand, those coming in from other environments will likely
+be more familiar with these last two.
+
+
+Single-threaded code
+====================
+
+In single-threaded code, there is no reordering, at least assuming
+that your toolchain and hardware are working correctly. In addition,
+it is generally a mistake to assume your code will only run in a single
+threaded context as the kernel can enter the same code path on multiple
+CPUs at the same time. One important exception is a function that makes
+no external data references.
+
+In the general case, you will need to take explicit steps to ensure that
+your code really is executed within a single thread that does not access
+shared variables. A simple way to achieve this is to define a global lock
+that you acquire at the beginning of your code and release at the end,
+taking care to ensure that all references to your code's shared data are
+also carried out under that same lock. Because only one thread can hold
+this lock at a given time, your code will be executed single-threaded.
+This approach is called "code locking".
+
+Code locking can severely limit both performance and scalability, so it
+should be used with caution, and only on code paths that execute rarely.
+After all, a huge amount of effort was required to remove the Linux
+kernel's old "Big Kernel Lock", so let's please be very careful about
+adding new "little kernel locks".
+
+One of the advantages of locking is that, in happy contrast with the
+year 1981, almost all kernel developers are very familiar with locking.
+The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with
+the formerly feared deadlock scenarios.
+
+Please use the standard locking primitives provided by the kernel rather
+than rolling your own. For one thing, the standard primitives interact
+properly with lockdep. For another thing, these primitives have been
+tuned to deal better with high contention. And for one final thing, it is
+surprisingly hard to correctly code production-quality lock acquisition
+and release functions. After all, even simple non-production-quality
+locking functions must carefully prevent both the CPU and the compiler
+from moving code in either direction across the locking function.
+
+Despite the scalability limitations of single-threaded code, RCU
+takes this approach for much of its grace-period processing and also
+for early-boot operation. The reason RCU is able to scale despite
+single-threaded grace-period processing is use of batching, where all
+updates that accumulated during one grace period are handled by the
+next one. In other words, slowing down grace-period processing makes
+it more efficient. Nor is RCU unique: Similar batching optimizations
+are used in many I/O operations.
+
+
+Packaged code
+=============
+
+Even if performance and scalability concerns prevent your code from
+being completely single-threaded, it is often possible to use library
+functions that handle the concurrency nearly or entirely on their own.
+This approach delegates any LKMM worries to the library maintainer.
+
+In the kernel, what is the "library"? Quite a bit. It includes the
+contents of the lib/ directory, much of the include/linux/ directory along
+with a lot of other heavily used APIs. But heavily used examples include
+the list macros (for example, include/linux/{,rcu}list.h), workqueues,
+smp_call_function(), and the various hash tables and search trees.
+
+
+Data locking
+============
+
+With code locking, we use single-threaded code execution to guarantee
+serialized access to the data that the code is accessing. However,
+we can also achieve this by instead associating the lock with specific
+instances of the data structures. This creates a "critical section"
+in the code execution that will execute as though it is single threaded.
+By placing all the accesses and modifications to a shared data structure
+inside a critical section, we ensure that the execution context that
+holds the lock has exclusive access to the shared data.
+
+The poster boy for this approach is the hash table, where placing a lock
+in each hash bucket allows operations on different buckets to proceed
+concurrently. This works because the buckets do not overlap with each
+other, so that an operation on one bucket does not interfere with any
+other bucket.
+
+As the number of buckets increases, data locking scales naturally.
+In particular, if the amount of data increases with the number of CPUs,
+increasing the number of buckets as the number of CPUs increase results
+in a naturally scalable data structure.
+
+
+Per-CPU processing
+==================
+
+Partitioning processing and data over CPUs allows each CPU to take
+a single-threaded approach while providing excellent performance and
+scalability. Of course, there is no free lunch: The dark side of this
+excellence is substantially increased memory footprint.
+
+In addition, it is sometimes necessary to occasionally update some global
+view of this processing and data, in which case something like locking
+must be used to protect this global view. This is the approach taken
+by the percpu_counter infrastructure. In many cases, there are already
+generic/library variants of commonly used per-cpu constructs available.
+Please use them rather than rolling your own.
+
+RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU
+data sets. For example, each CPU does private quiescent-state processing
+within its instance of the per-CPU rcu_data structure, and then uses data
+locking to report quiescent states up the grace-period combining tree.
+
+
+Packaged primitives: Sequence locking
+=====================================
+
+Lockless programming is considered by many to be more difficult than
+lock-based programming, but there are a few lockless design patterns that
+have been built out into an API. One of these APIs is sequence locking.
+Although this APIs can be used in extremely complex ways, there are simple
+and effective ways of using it that avoid the need to pay attention to
+memory ordering.
+
+The basic keep-things-simple rule for sequence locking is "do not write
+in read-side code". Yes, you can do writes from within sequence-locking
+readers, but it won't be so simple. For example, such writes will be
+lockless and should be idempotent.
+
+For more sophisticated use cases, LKMM can guide you, including use
+cases involving combining sequence locking with other synchronization
+primitives. (LKMM does not yet know about sequence locking, so it is
+currently necessary to open-code it in your litmus tests.)
+
+Additional information may be found in include/linux/seqlock.h.
+
+Packaged primitives: RCU
+========================
+
+Another lockless design pattern that has been baked into an API
+is RCU. The Linux kernel makes sophisticated use of RCU, but the
+keep-things-simple rules for RCU are "do not write in read-side code"
+and "do not update anything that is visible to and accessed by readers",
+and "protect updates with locking".
+
+These rules are illustrated by the functions foo_update_a() and
+foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional
+RCU usage patterns maybe found in Documentation/RCU and in the
+source code.
+
+
+Packaged primitives: Atomic operations
+======================================
+
+Back in the day, the Linux kernel had three types of atomic operations:
+
+1. Initialization and read-out, such as atomic_set() and atomic_read().
+
+2. Operations that did not return a value and provided no ordering,
+ such as atomic_inc() and atomic_dec().
+
+3. Operations that returned a value and provided full ordering, such as
+ atomic_add_return() and atomic_dec_and_test(). Note that some
+ value-returning operations provide full ordering only conditionally.
+ For example, cmpxchg() provides ordering only upon success.
+
+More recent kernels have operations that return a value but do not
+provide full ordering. These are flagged with either a _relaxed()
+suffix (providing no ordering), or an _acquire() or _release() suffix
+(providing limited ordering).
+
+Additional information may be found in these files:
+
+Documentation/atomic_t.txt
+Documentation/atomic_bitops.txt
+Documentation/core-api/atomic_ops.rst
+Documentation/core-api/refcount-vs-atomic.rst
+
+Reading code using these primitives is often also quite helpful.
+
+
+Lockless, fully ordered
+=======================
+
+When using locking, there often comes a time when it is necessary
+to access some variable or another without holding the data lock
+that serializes access to that variable.
+
+If you want to keep things simple, use the initialization and read-out
+operations from the previous section only when there are no racing
+accesses. Otherwise, use only fully ordered operations when accessing
+or modifying the variable. This approach guarantees that code prior
+to a given access to that variable will be seen by all CPUs has having
+happened before any code following any later access to that same variable.
+
+Please note that per-CPU functions are not atomic operations and
+hence they do not provide any ordering guarantees at all.
+
+If the lockless accesses are frequently executed reads that are used
+only for heuristics, or if they are frequently executed writes that
+are used only for statistics, please see the next section.
+
+
+Lockless statistics and heuristics
+==================================
+
+Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and
+WRITE_ONCE() can safely be used in some cases. These primitives provide
+no ordering, but they do prevent the compiler from carrying out a number
+of destructive optimizations (for which please see the next section).
+One example use for these primitives is statistics, such as per-CPU
+counters exemplified by the rt_cache_stat structure's routing-cache
+statistics counters. Another example use case is heuristics, such as
+the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters
+controlling how often RCU scans for idle CPUs.
+
+But be careful. "Unordered" really does mean "unordered". It is all
+too easy to assume ordering, and this assumption must be avoided when
+using these primitives.
+
+
+Don't let the compiler trip you up
+==================================
+
+It can be quite tempting to use plain C-language accesses for lockless
+loads from and stores to shared variables. Although this is both
+possible and quite common in the Linux kernel, it does require a
+surprising amount of analysis, care, and knowledge about the compiler.
+Yes, some decades ago it was not unfair to consider a C compiler to be
+an assembler with added syntax and better portability, but the advent of
+sophisticated optimizing compilers mean that those days are long gone.
+Today's optimizing compilers can profoundly rewrite your code during the
+translation process, and have long been ready, willing, and able to do so.
+
+Therefore, if you really need to use C-language assignments instead of
+READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good
+understanding of both the C standard and your compiler. Here are some
+introductory references and some tooling to start you on this noble quest:
+
+Who's afraid of a big bad optimizing compiler?
+ https://lwn.net/Articles/793253/
+Calibrating your fear of big bad optimizing compilers
+ https://lwn.net/Articles/799218/
+Concurrency bugs should fear the big bad data-race detector (part 1)
+ https://lwn.net/Articles/816850/
+Concurrency bugs should fear the big bad data-race detector (part 2)
+ https://lwn.net/Articles/816854/
+
+
+More complex use cases
+======================
+
+If the alternatives above do not do what you need, please look at the
+recipes-pairs.txt file to peel off the next layer of the memory-ordering
+onion.
diff --git a/tools/memory-model/README b/tools/memory-model/README
index ecb7385376bf..c8144d4aafa0 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -63,10 +63,32 @@ BASIC USAGE: HERD7
==================
The memory model is used, in conjunction with "herd7", to exhaustively
-explore the state space of small litmus tests.
+explore the state space of small litmus tests. Documentation describing
+the format, features, capabilities and limitations of these litmus
+tests is available in tools/memory-model/Documentation/litmus-tests.txt.
-For example, to run SB+fencembonceonces.litmus against the memory model:
+Example litmus tests may be found in the Linux-kernel source tree:
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available here:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+Documentation describing litmus tests and now to use them may be found
+here:
+
+ tools/memory-model/Documentation/litmus-tests.txt
+
+The remainder of this section uses the SB+fencembonceonces.litmus test
+located in the tools/memory-model directory.
+
+To run SB+fencembonceonces.litmus against the memory model:
+
+ $ cd $LINUX_SOURCE_TREE/tools/memory-model
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
Here is the corresponding output:
@@ -87,7 +109,11 @@ Here is the corresponding output:
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
this litmus test's "exists" clause can not be satisfied.
-See "herd7 -help" or "herdtools7/doc/" for more information.
+See "herd7 -help" or "herdtools7/doc/" for more information on running the
+tool itself, but please be aware that this documentation is intended for
+people who work on the memory model itself, that is, people making changes
+to the tools/memory-model/linux-kernel.* files. It is not intended for
+people focusing on writing, understanding, and running LKMM litmus tests.
=====================
@@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus
test's "exists" clause was not reached.
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
-for more information.
+for more information. And again, please be aware that this documentation
+is intended for people who work on the memory model itself, that is,
+people making changes to the tools/memory-model/linux-kernel.* files.
+It is not intended for people focusing on writing, understanding, and
+running LKMM litmus tests.
====================
@@ -137,12 +167,21 @@ Documentation/cheatsheet.txt
Documentation/explanation.txt
Describes the memory model in detail.
+Documentation/litmus-tests.txt
+ Describes the format, features, capabilities, and limitations
+ of the litmus tests that LKMM can evaluate.
+
Documentation/recipes.txt
Lists common memory-ordering patterns.
Documentation/references.txt
Provides background reading.
+Documentation/simple.txt
+ Starting point for someone new to Linux-kernel concurrency.
+ And also for those needing a reminder of the simpler approaches
+ to concurrency!
+
linux-kernel.bell
Categorizes the relevant instructions, including memory
references, memory barriers, atomic read-modify-write operations,
@@ -187,116 +226,3 @@ README
This file.
scripts Various scripts, see scripts/README.
-
-
-===========
-LIMITATIONS
-===========
-
-The Linux-kernel memory model (LKMM) has the following limitations:
-
-1. Compiler optimizations are not accurately modeled. Of course,
- the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
- ability to optimize, but under some circumstances it is possible
- for the compiler to undermine the memory model. For more
- information, see Documentation/explanation.txt (in particular,
- the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
- sections).
-
- Note that this limitation in turn limits LKMM's ability to
- accurately model address, control, and data dependencies.
- For example, if the compiler can deduce the value of some variable
- carrying a dependency, then the compiler can break that dependency
- by substituting a constant of that value.
-
-2. Multiple access sizes for a single variable are not supported,
- and neither are misaligned or partially overlapping accesses.
-
-3. Exceptions and interrupts are not modeled. In some cases,
- this limitation can be overcome by modeling the interrupt or
- exception with an additional process.
-
-4. I/O such as MMIO or DMA is not supported.
-
-5. Self-modifying code (such as that found in the kernel's
- alternatives mechanism, function tracer, Berkeley Packet Filter
- JIT compiler, and module loader) is not supported.
-
-6. Complete modeling of all variants of atomic read-modify-write
- operations, locking primitives, and RCU is not provided.
- For example, call_rcu() and rcu_barrier() are not supported.
- However, a substantial amount of support is provided for these
- operations, as shown in the linux-kernel.def file.
-
- a. When rcu_assign_pointer() is passed NULL, the Linux
- kernel provides no ordering, but LKMM models this
- case as a store release.
-
- b. The "unless" RMW operations are not currently modeled:
- atomic_long_add_unless(), atomic_inc_unless_negative(),
- and atomic_dec_unless_positive(). These can be emulated
- in litmus tests, for example, by using atomic_cmpxchg().
-
- One exception of this limitation is atomic_add_unless(),
- which is provided directly by herd7 (so no corresponding
- definition in linux-kernel.def). atomic_add_unless() is
- modeled by herd7 therefore it can be used in litmus tests.
-
- c. The call_rcu() function is not modeled. It can be
- emulated in litmus tests by adding another process that
- invokes synchronize_rcu() and the body of the callback
- function, with (for example) a release-acquire from
- the site of the emulated call_rcu() to the beginning
- of the additional process.
-
- d. The rcu_barrier() function is not modeled. It can be
- emulated in litmus tests emulating call_rcu() via
- (for example) a release-acquire from the end of each
- additional call_rcu() process to the site of the
- emulated rcu-barrier().
-
- e. Although sleepable RCU (SRCU) is now modeled, there
- are some subtle differences between its semantics and
- those in the Linux kernel. For example, the kernel
- might interpret the following sequence as two partially
- overlapping SRCU read-side critical sections:
-
- 1 r1 = srcu_read_lock(&my_srcu);
- 2 do_something_1();
- 3 r2 = srcu_read_lock(&my_srcu);
- 4 do_something_2();
- 5 srcu_read_unlock(&my_srcu, r1);
- 6 do_something_3();
- 7 srcu_read_unlock(&my_srcu, r2);
-
- In contrast, LKMM will interpret this as a nested pair of
- SRCU read-side critical sections, with the outer critical
- section spanning lines 1-7 and the inner critical section
- spanning lines 3-5.
-
- This difference would be more of a concern had anyone
- identified a reasonable use case for partially overlapping
- SRCU read-side critical sections. For more information,
- please see: https://paulmck.livejournal.com/40593.html
-
- f. Reader-writer locking is not modeled. It can be
- emulated in litmus tests using atomic read-modify-write
- operations.
-
-The "herd7" tool has some additional limitations of its own, apart from
-the memory model:
-
-1. Non-trivial data structures such as arrays or structures are
- not supported. However, pointers are supported, allowing trivial
- linked lists to be constructed.
-
-2. Dynamic memory allocation is not supported, although this can
- be worked around in some cases by supplying multiple statically
- allocated variables.
-
-Some of these limitations may be overcome in the future, but others are
-more likely to be addressed by incorporating the Linux-kernel memory model
-into other tools.
-
-Finally, please note that LKMM is subject to change as hardware, use cases,
-and compilers evolve.
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 42ac19e0299c..326ac390168b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -16,6 +16,7 @@
#include <linux/hashtable.h>
#include <linux/kernel.h>
+#include <linux/static_call_types.h>
#define FAKE_JUMP_OFFSET -1
@@ -433,6 +434,103 @@ reachable:
return 0;
}
+static int create_static_call_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ struct static_call_site *site;
+ struct instruction *insn;
+ struct symbol *key_sym;
+ char *key_name, *tmp;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".static_call_sites");
+ if (sec) {
+ INIT_LIST_HEAD(&file->static_call_list);
+ WARN("file already has .static_call_sites section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->static_call_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+ sizeof(struct static_call_site), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate reloc for 'addr' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ /* find key symbol */
+ key_name = strdup(insn->call_dest->name);
+ if (!key_name) {
+ perror("strdup");
+ return -1;
+ }
+ if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+ STATIC_CALL_TRAMP_PREFIX_LEN)) {
+ WARN("static_call: trampoline name malformed: %s", key_name);
+ return -1;
+ }
+ tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+ memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+ key_sym = find_symbol_by_name(file->elf, tmp);
+ if (!key_sym) {
+ WARN("static_call: can't find static_call_key symbol: %s", tmp);
+ return -1;
+ }
+ free(key_name);
+
+ /* populate reloc for 'key' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = key_sym;
+ reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site) + 4;
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -528,6 +626,61 @@ static const char *uaccess_safe_builtin[] = {
"__tsan_write4",
"__tsan_write8",
"__tsan_write16",
+ "__tsan_read_write1",
+ "__tsan_read_write2",
+ "__tsan_read_write4",
+ "__tsan_read_write8",
+ "__tsan_read_write16",
+ "__tsan_atomic8_load",
+ "__tsan_atomic16_load",
+ "__tsan_atomic32_load",
+ "__tsan_atomic64_load",
+ "__tsan_atomic8_store",
+ "__tsan_atomic16_store",
+ "__tsan_atomic32_store",
+ "__tsan_atomic64_store",
+ "__tsan_atomic8_exchange",
+ "__tsan_atomic16_exchange",
+ "__tsan_atomic32_exchange",
+ "__tsan_atomic64_exchange",
+ "__tsan_atomic8_fetch_add",
+ "__tsan_atomic16_fetch_add",
+ "__tsan_atomic32_fetch_add",
+ "__tsan_atomic64_fetch_add",
+ "__tsan_atomic8_fetch_sub",
+ "__tsan_atomic16_fetch_sub",
+ "__tsan_atomic32_fetch_sub",
+ "__tsan_atomic64_fetch_sub",
+ "__tsan_atomic8_fetch_and",
+ "__tsan_atomic16_fetch_and",
+ "__tsan_atomic32_fetch_and",
+ "__tsan_atomic64_fetch_and",
+ "__tsan_atomic8_fetch_or",
+ "__tsan_atomic16_fetch_or",
+ "__tsan_atomic32_fetch_or",
+ "__tsan_atomic64_fetch_or",
+ "__tsan_atomic8_fetch_xor",
+ "__tsan_atomic16_fetch_xor",
+ "__tsan_atomic32_fetch_xor",
+ "__tsan_atomic64_fetch_xor",
+ "__tsan_atomic8_fetch_nand",
+ "__tsan_atomic16_fetch_nand",
+ "__tsan_atomic32_fetch_nand",
+ "__tsan_atomic64_fetch_nand",
+ "__tsan_atomic8_compare_exchange_strong",
+ "__tsan_atomic16_compare_exchange_strong",
+ "__tsan_atomic32_compare_exchange_strong",
+ "__tsan_atomic64_compare_exchange_strong",
+ "__tsan_atomic8_compare_exchange_weak",
+ "__tsan_atomic16_compare_exchange_weak",
+ "__tsan_atomic32_compare_exchange_weak",
+ "__tsan_atomic64_compare_exchange_weak",
+ "__tsan_atomic8_compare_exchange_val",
+ "__tsan_atomic16_compare_exchange_val",
+ "__tsan_atomic32_compare_exchange_val",
+ "__tsan_atomic64_compare_exchange_val",
+ "__tsan_atomic_thread_fence",
+ "__tsan_atomic_signal_fence",
/* KCOV */
"write_comp_data",
"check_kcov_mode",
@@ -650,6 +803,10 @@ static int add_jump_destinations(struct objtool_file *file)
} else {
/* external sibling call */
insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
continue;
}
@@ -701,6 +858,10 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call */
insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
}
}
}
@@ -1523,6 +1684,23 @@ static int read_intra_function_calls(struct objtool_file *file)
return 0;
}
+static int read_static_call_tramps(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->bind == STB_GLOBAL &&
+ !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
+ }
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1570,6 +1748,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_static_call_tramps(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -2433,6 +2615,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
+ if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+
break;
case INSN_JUMP_CONDITIONAL:
@@ -2792,6 +2979,7 @@ int check(const char *_objname, bool orc)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.static_call_list);
file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
@@ -2839,6 +3027,11 @@ int check(const char *_objname, bool orc)
warnings += ret;
}
+ ret = create_static_call_sections(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
if (orc) {
ret = create_orc(&file);
if (ret < 0)
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 061aa96e15d3..36d38b9153ac 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -22,6 +22,7 @@ struct insn_state {
struct instruction {
struct list_head list;
struct hlist_node hash;
+ struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3ddbd66f1a37..4e1d7460574b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -652,7 +652,7 @@ err:
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, int nr)
+ unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
+ sec->sh.sh_flags = SHF_ALLOC | sh_flags;
/* Add section name to .shstrtab (or .strtab for Clang) */
@@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
strcpy(relocname, ".rel");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
free(relocname);
if (!sec)
return NULL;
@@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
strcpy(relocname, ".rela");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
free(relocname);
if (!sec)
return NULL;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 6cc80a075166..807f8c670097 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -56,6 +56,7 @@ struct symbol {
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
+ bool static_call_tramp;
};
struct reloc {
@@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
}
struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
void elf_add_reloc(struct elf *elf, struct reloc *reloc);
int elf_write_insn(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
index 528028a66816..9a7cd0b88bd8 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/objtool.h
@@ -16,6 +16,7 @@ struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head static_call_list;
bool ignore_unreachables, c_file, hints, rodata;
};
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 968f55e6dd94..e6b2363c2e03 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file)
/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
if (!sec)
return -1;
@@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file)
return -1;
/* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind",
+ u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
sizeof(struct orc_entry), idx);
/* populate sections */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 2a1261bfbb62..aa099b21dffa 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
+include/linux/static_call_types.h
'
check_2 () {
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b..b168364ac050 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
18 spu oldstat sys_ni_syscall
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid
-21 nospu mount sys_mount compat_sys_mount
+21 nospu mount sys_mount
22 32 umount sys_oldumount
22 64 umount sys_ni_syscall
22 spu umount sys_ni_syscall
@@ -189,8 +189,8 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
149 nospu _sysctl sys_ni_syscall
@@ -363,7 +363,7 @@
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
-285 common vmsplice sys_vmsplice compat_sys_vmsplice
+285 common vmsplice sys_vmsplice
286 common openat sys_openat compat_sys_openat
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
@@ -443,8 +443,8 @@
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
-351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
353 nospu finit_module sys_finit_module
354 nospu kcmp sys_kcmp
355 common sched_setattr sys_sched_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db..d2fa9647ce25 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
16 32 lchown - compat_sys_s390_lchown16
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount compat_sys_mount
+21 common mount sys_mount
22 common umount sys_oldumount compat_sys_oldumount
23 32 setuid - compat_sys_s390_setuid16
24 32 getuid - compat_sys_s390_getuid16
@@ -134,8 +134,8 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync compat_sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
149 common _sysctl - -
@@ -316,7 +316,7 @@
306 common splice sys_splice compat_sys_splice
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee compat_sys_tee
-309 common vmsplice sys_vmsplice compat_sys_vmsplice
+309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
311 common getcpu sys_getcpu compat_sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
338 common syncfs sys_syncfs sys_syncfs
339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
343 common kcmp sys_kcmp compat_sys_kcmp
344 common finit_module sys_finit_module compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a688..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
@@ -388,15 +388,15 @@
529 x32 waitid compat_sys_waitid
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
+532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg compat_sys_recvmmsg_time64
538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
541 x32 setsockopt sys_setsockopt
542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 46ff97e909c6..1bc36a1db14f 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -171,7 +171,7 @@ class SystemValues:
tracefuncs = {
'sys_sync': {},
'ksys_sync': {},
- '__pm_notifier_call_chain': {},
+ 'pm_notifier_call_chain_robust': {},
'pm_prepare_console': {},
'pm_notifier_call_chain': {},
'freeze_processes': {},
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
+#include <linux/membarrier.h>
#include <pthread.h>
#include <sched.h>
+#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
return ret;
}
+struct test_membarrier_thread_args {
+ int stop;
+ intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ const int iters = opt_reps;
+ int i;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Wait for initialization. */
+ while (!atomic_load(&args->percpu_list_ptr)) {}
+
+ for (i = 0; i < iters; ++i) {
+ int ret;
+
+ do {
+ int cpu = rseq_cpu_start();
+
+ ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+ sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ } while (rseq_unlikely(ret));
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ memset(list, 0, sizeof(*list));
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = 0;
+ node->next = NULL;
+ list->c[i].head = node;
+ }
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ struct percpu_list list_a, list_b;
+ intptr_t expect_a = 0, expect_b = 0;
+ int cpu_a = 0, cpu_b = 0;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Init lists. */
+ test_membarrier_init_percpu_list(&list_a);
+ test_membarrier_init_percpu_list(&list_b);
+
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+ while (!atomic_load(&args->stop)) {
+ /* list_a is "active". */
+ cpu_a = rand() % CPU_SETSIZE;
+ /*
+ * As list_b is "inactive", we should never see changes
+ * to list_b.
+ */
+ if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_b "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+ errno != ENXIO /* missing CPU */) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /*
+ * Cpu A should now only modify list_b, so the values
+ * in list_a should be stable.
+ */
+ expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+ cpu_b = rand() % CPU_SETSIZE;
+ /*
+ * As list_a is "inactive", we should never see changes
+ * to list_a.
+ */
+ if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_a "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+ errno != ENXIO /* missing CPU*/) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /* Remember a value from list_b. */
+ expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+ }
+
+ test_membarrier_free_percpu_list(&list_a);
+ test_membarrier_free_percpu_list(&list_b);
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+ const int num_threads = opt_threads;
+ struct test_membarrier_thread_args thread_args;
+ pthread_t worker_threads[num_threads];
+ pthread_t manager_thread;
+ int i, ret;
+
+ if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+ perror("sys_membarrier");
+ abort();
+ }
+
+ thread_args.stop = 0;
+ thread_args.percpu_list_ptr = 0;
+ ret = pthread_create(&manager_thread, NULL,
+ test_membarrier_manager_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&worker_threads[i], NULL,
+ test_membarrier_worker_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(worker_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ atomic_store(&thread_args.stop, 1);
+ ret = pthread_join(manager_thread, NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+ fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ "Skipping membarrier test.\n");
+}
+#endif
+
static void show_usage(int argc, char **argv)
{
printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
- printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
case 'i':
case 'b':
case 'm':
+ case 'r':
break;
default:
show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
printf_verbose("counter increment\n");
test_percpu_inc();
break;
+ case 'r':
+ printf_verbose("membarrier\n");
+ test_membarrier();
+ break;
}
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
#endif
}
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+ "addq %[off], %%rbx\n\t"
+ /* get pv */
+ "movq (%%rbx), %%rcx\n\t"
+ /* *pv += inc */
+ "addq %[inc], (%%rcx)\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_abi] "r" (&__rseq_abi),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ : "memory", "cc", "rax", "rbx", "rcx"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
"-T m"
"-T m -M"
"-T i"
+ "-T r"
)
TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
"memcpy"
"memcpy with barrier"
"increment"
+ "membarrier"
)
IFS="$OLDIFS"
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 998319553523..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void)
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -526,6 +591,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {