diff options
Diffstat (limited to 'tools')
29 files changed, 2377 insertions, 315 deletions
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1a303a81aeef..90c3155f05b1 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,17 +23,17 @@ #include <sys/ioctl.h> #include <sys/types.h> #include <linux/gpio.h> +#include "gpio-utils.h" int monitor_device(const char *device_name, - unsigned int line, - uint32_t handleflags, - uint32_t eventflags, + unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, unsigned int loops) { - struct gpioevent_request req; - struct gpiohandle_data data; + struct gpio_v2_line_values values; char *chrdev_name; - int fd; + int cfd, lfd; int ret; int i = 0; @@ -41,44 +41,55 @@ int monitor_device(const char *device_name, if (ret < 0) return -ENOMEM; - fd = open(chrdev_name, 0); - if (fd == -1) { + cfd = open(chrdev_name, 0); + if (cfd == -1) { ret = -errno; fprintf(stderr, "Failed to open %s\n", chrdev_name); goto exit_free_name; } - req.lineoffset = line; - req.handleflags = handleflags; - req.eventflags = eventflags; - strcpy(req.consumer_label, "gpio-event-mon"); - - ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GET EVENT " - "IOCTL (%d)\n", - ret); - goto exit_close_error; - } + ret = gpiotools_request_line(device_name, lines, num_lines, config, + "gpio-event-mon"); + if (ret < 0) + goto exit_device_close; + else + lfd = ret; /* Read initial states */ - ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " - "VALUES IOCTL (%d)\n", + values.mask = 0; + values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); + ret = gpiotools_get_values(lfd, &values); + if (ret < 0) { + fprintf(stderr, + "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n", ret); - goto exit_close_error; + goto exit_line_close; } - fprintf(stdout, "Monitoring line %d on %s\n", line, device_name); - fprintf(stdout, "Initial line value: %d\n", data.values[0]); + if (num_lines == 1) { + fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name); + fprintf(stdout, "Initial line value: %d\n", + gpiotools_test_bit(values.bits, 0)); + } else { + fprintf(stdout, "Monitoring lines %d", lines[0]); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", lines[i]); + fprintf(stdout, " and %d on %s\n", lines[i], device_name); + fprintf(stdout, "Initial line values: %d", + gpiotools_test_bit(values.bits, 0)); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", + gpiotools_test_bit(values.bits, i)); + fprintf(stdout, " and %d\n", + gpiotools_test_bit(values.bits, i)); + } while (1) { - struct gpioevent_data event; + struct gpio_v2_line_event event; - ret = read(req.fd, &event, sizeof(event)); + ret = read(lfd, &event, sizeof(event)); if (ret == -1) { if (errno == -EAGAIN) { fprintf(stderr, "nothing available\n"); @@ -96,12 +107,14 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp); + fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", + event.timestamp_ns, event.offset, event.line_seqno, + event.seqno); switch (event.id) { - case GPIOEVENT_EVENT_RISING_EDGE: + case GPIO_V2_LINE_EVENT_RISING_EDGE: fprintf(stdout, "rising edge"); break; - case GPIOEVENT_EVENT_FALLING_EDGE: + case GPIO_V2_LINE_EVENT_FALLING_EDGE: fprintf(stdout, "falling edge"); break; default: @@ -114,8 +127,11 @@ int monitor_device(const char *device_name, break; } -exit_close_error: - if (close(fd) == -1) +exit_line_close: + if (close(lfd) == -1) + perror("Failed to close line file"); +exit_device_close: + if (close(cfd) == -1) perror("Failed to close GPIO character device file"); exit_free_name: free(chrdev_name); @@ -127,29 +143,37 @@ void print_usage(void) fprintf(stderr, "Usage: gpio-event-mon [options]...\n" "Listen to events on GPIO lines, 0->1 1->0\n" " -n <name> Listen on GPIOs on a named device (must be stated)\n" - " -o <n> Offset to monitor\n" + " -o <n> Offset of line to monitor (may be repeated)\n" " -d Set line as open drain\n" " -s Set line as open source\n" " -r Listen for rising edges\n" " -f Listen for falling edges\n" + " -b <n> Debounce the line with period n microseconds\n" " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n" " -? This helptext\n" "\n" "Example:\n" - "gpio-event-mon -n gpiochip0 -o 4 -r -f\n" + "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n" ); } +#define EDGE_FLAGS \ + (GPIO_V2_LINE_FLAG_EDGE_RISING | \ + GPIO_V2_LINE_FLAG_EDGE_FALLING) + int main(int argc, char **argv) { const char *device_name = NULL; - unsigned int line = -1; + unsigned int lines[GPIO_V2_LINES_MAX]; + unsigned int num_lines = 0; unsigned int loops = 0; - uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT; - uint32_t eventflags = 0; - int c; + struct gpio_v2_line_config config; + int c, attr, i; + unsigned long debounce_period_us = 0; - while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; + while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) { switch (c) { case 'c': loops = strtoul(optarg, NULL, 10); @@ -158,19 +182,27 @@ int main(int argc, char **argv) device_name = optarg; break; case 'o': - line = strtoul(optarg, NULL, 10); + if (num_lines >= GPIO_V2_LINES_MAX) { + print_usage(); + return -1; + } + lines[num_lines] = strtoul(optarg, NULL, 10); + num_lines++; + break; + case 'b': + debounce_period_us = strtoul(optarg, NULL, 10); break; case 'd': - handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN; break; case 's': - handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE; break; case 'r': - eventflags |= GPIOEVENT_REQUEST_RISING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING; break; case 'f': - eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING; break; case '?': print_usage(); @@ -178,15 +210,23 @@ int main(int argc, char **argv) } } - if (!device_name || line == -1) { + if (debounce_period_us) { + attr = config.num_attrs; + config.num_attrs++; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&config.attrs[attr].mask, i); + config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE; + config.attrs[attr].attr.debounce_period_us = debounce_period_us; + } + + if (!device_name || num_lines == 0) { print_usage(); return -1; } - if (!eventflags) { + if (!(config.flags & EDGE_FLAGS)) { printf("No flags specified, listening on both rising and " "falling edges\n"); - eventflags = GPIOEVENT_REQUEST_BOTH_EDGES; + config.flags |= EDGE_FLAGS; } - return monitor_device(device_name, line, handleflags, - eventflags, loops); + return monitor_device(device_name, lines, num_lines, &config, loops); } diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 9fd926e8cb52..54fdf59dd320 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -22,39 +22,46 @@ #include <linux/gpio.h> #include "gpio-utils.h" -int hammer_device(const char *device_name, unsigned int *lines, int nlines, +int hammer_device(const char *device_name, unsigned int *lines, int num_lines, unsigned int loops) { - struct gpiohandle_data data; + struct gpio_v2_line_values values; + struct gpio_v2_line_config config; char swirr[] = "-\\|/"; int fd; int ret; int i, j; unsigned int iteration = 0; - memset(&data.values, 0, sizeof(data.values)); - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_OUTPUT, &data, - "gpio-hammer"); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, "gpio-hammer"); if (ret < 0) goto exit_error; else fd = ret; - ret = gpiotools_get_values(fd, &data); + values.mask = 0; + values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); + + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; fprintf(stdout, "Hammer lines ["); - for (i = 0; i < nlines; i++) { + for (i = 0; i < num_lines; i++) { fprintf(stdout, "%d", lines[i]); - if (i != (nlines - 1)) + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "] on %s, initial states: [", device_name); - for (i = 0; i < nlines; i++) { - fprintf(stdout, "%d", data.values[i]); - if (i != (nlines - 1)) + for (i = 0; i < num_lines; i++) { + fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i)); + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\n"); @@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; while (1) { /* Invert all lines so we blink */ - for (i = 0; i < nlines; i++) - data.values[i] = !data.values[i]; + for (i = 0; i < num_lines; i++) + gpiotools_change_bit(&values.bits, i); - ret = gpiotools_set_values(fd, &data); + ret = gpiotools_set_values(fd, &values); if (ret < 0) goto exit_close_error; /* Re-read values to get status */ - ret = gpiotools_get_values(fd, &data); + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; @@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; fprintf(stdout, "["); - for (i = 0; i < nlines; i++) { - fprintf(stdout, "%d: %d", lines[i], data.values[i]); - if (i != (nlines - 1)) + for (i = 0; i < num_lines; i++) { + fprintf(stdout, "%d: %d", lines[i], + gpiotools_test_bit(values.bits, i)); + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\r"); @@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, ret = 0; exit_close_error: - gpiotools_release_linehandle(fd); + gpiotools_release_line(fd); exit_error: return ret; } @@ -121,7 +129,7 @@ int main(int argc, char **argv) const char *device_name = NULL; unsigned int lines[GPIOHANDLES_MAX]; unsigned int loops = 0; - int nlines; + int num_lines; int c; int i; @@ -158,11 +166,11 @@ int main(int argc, char **argv) return -1; } - nlines = i; + num_lines = i; - if (!device_name || !nlines) { + if (!device_name || !num_lines) { print_usage(); return -1; } - return hammer_device(device_name, lines, nlines, loops); + return hammer_device(device_name, lines, num_lines, loops); } diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 16a5d9cb9da2..37187e056c8b 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -38,7 +38,7 @@ * such as "gpiochip0" * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. + * @num_lines: The number of lines to request. * @flag: The new flag for requsted gpio. Reference * "linux/gpio.h" for the meaning of flag. * @data: Default value will be set to gpio when flag is @@ -56,7 +56,7 @@ * On failure return the errno. */ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label) { @@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, goto exit_free_name; } - for (i = 0; i < nlines; i++) + for (i = 0; i < num_lines; i++) req.lineoffsets[i] = lines[i]; req.flags = flag; strcpy(req.consumer_label, consumer_label); - req.lines = nlines; + req.lines = num_lines; if (flag & GPIOHANDLE_REQUEST_OUTPUT) memcpy(req.default_values, data, sizeof(req.default_values)); @@ -100,20 +100,87 @@ exit_free_name: free(chrdev_name); return ret < 0 ? ret : req.fd; } + +/** + * gpiotools_request_line() - request gpio lines in a gpiochip + * @device_name: The name of gpiochip without prefix "/dev/", + * such as "gpiochip0" + * @lines: An array desired lines, specified by offset + * index for the associated GPIO device. + * @num_lines: The number of lines to request. + * @config: The new config for requested gpio. Reference + * "linux/gpio.h" for config details. + * @consumer: The name of consumer, such as "sysfs", + * "powerkey". This is useful for other users to + * know who is using. + * + * Request gpio lines through the ioctl provided by chardev. User + * could call gpiotools_set_values() and gpiotools_get_values() to + * read and write respectively through the returned fd. Call + * gpiotools_release_line() to release these lines after that. + * + * Return: On success return the fd; + * On failure return the errno. + */ +int gpiotools_request_line(const char *device_name, unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer) +{ + struct gpio_v2_line_request req; + char *chrdev_name; + int fd; + int i; + int ret; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s, %s\n", + chrdev_name, strerror(errno)); + goto exit_free_name; + } + + memset(&req, 0, sizeof(req)); + for (i = 0; i < num_lines; i++) + req.offsets[i] = lines[i]; + + req.config = *config; + strcpy(req.consumer, consumer); + req.num_lines = num_lines; + + ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue %s (%d), %s\n", + "GPIO_GET_LINE_IOCTL", ret, strerror(errno)); + } + + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); +exit_free_name: + free(chrdev_name); + return ret < 0 ? ret : req.fd; +} + /** * gpiotools_set_values(): Set the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values want to set. + * gpiotools_request_line(). + * @values: The array of values want to set. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_set_values(const int fd, struct gpiohandle_data *data) +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data) /** * gpiotools_get_values(): Get the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values get from hardware. + * gpiotools_request_line(). + * @values: The array of values get from hardware. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_get_values(const int fd, struct gpiohandle_data *data) +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd) } /** + * gpiotools_release_line(): Release the line(s) of gpiochip + * @fd: The fd returned by + * gpiotools_request_line(). + * + * Return: On success return 0; + * On failure return the errno. + */ +int gpiotools_release_line(const int fd) +{ + int ret; + + ret = close(fd); + if (ret == -1) { + perror("Failed to close GPIO LINE device file"); + ret = -errno; + } + + return ret; +} + +/** * gpiotools_get(): Get value from specific line * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0" @@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd) */ int gpiotools_get(const char *device_name, unsigned int line) { - struct gpiohandle_data data; + int ret; + unsigned int value; unsigned int lines[] = {line}; - gpiotools_gets(device_name, lines, 1, &data); - return data.values[0]; + ret = gpiotools_gets(device_name, lines, 1, &value); + if (ret) + return ret; + return value; } @@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line) * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. - * @data: The array of values get from gpiochip. + * @num_lines: The number of lines to request. + * @values: The array of values get from gpiochip. * * Return: On success return 0; * On failure return the errno. */ int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int fd; + int fd, i; int ret; int ret_close; + struct gpio_v2_line_config config; + struct gpio_v2_line_values lv; - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_INPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; fd = ret; - ret = gpiotools_get_values(fd, data); - ret_close = gpiotools_release_linehandle(fd); + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&lv.mask, i); + ret = gpiotools_get_values(fd, &lv); + if (!ret) + for (i = 0; i < num_lines; i++) + values[i] = gpiotools_test_bit(lv.bits, i); + ret_close = gpiotools_release_line(fd); return ret < 0 ? ret : ret_close; } @@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines, int gpiotools_set(const char *device_name, unsigned int line, unsigned int value) { - struct gpiohandle_data data; unsigned int lines[] = {line}; - data.values[0] = value; - return gpiotools_sets(device_name, lines, 1, &data); + return gpiotools_sets(device_name, lines, 1, &value); } /** @@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line, * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. - * @data: The array of values set to gpiochip, must be + * @num_lines: The number of lines to request. + * @value: The array of values set to gpiochip, must be * 0(low) or 1(high). * * Return: On success return 0; * On failure return the errno. */ int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int ret; + int ret, i; + struct gpio_v2_line_config config; - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_OUTPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + config.num_attrs = 1; + config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES; + for (i = 0; i < num_lines; i++) { + gpiotools_set_bit(&config.attrs[0].mask, i); + gpiotools_assign_bit(&config.attrs[0].attr.values, + i, values[i]); + } + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; - return gpiotools_release_linehandle(ret); + return gpiotools_release_line(ret); } diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h index cf37f13f3dcb..6c69a9f1c253 100644 --- a/tools/gpio/gpio-utils.h +++ b/tools/gpio/gpio-utils.h @@ -12,7 +12,9 @@ #ifndef _GPIO_UTILS_H_ #define _GPIO_UTILS_H_ +#include <stdbool.h> #include <string.h> +#include <linux/types.h> #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix) } int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label); -int gpiotools_set_values(const int fd, struct gpiohandle_data *data); -int gpiotools_get_values(const int fd, struct gpiohandle_data *data); int gpiotools_release_linehandle(const int fd); +int gpiotools_request_line(const char *device_name, + unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer); +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_release_line(const int fd); + int gpiotools_get(const char *device_name, unsigned int line); int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); int gpiotools_set(const char *device_name, unsigned int line, unsigned int value); int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); + +/* helper functions for gpio_v2_line_values bits */ +static inline void gpiotools_set_bit(__u64 *b, int n) +{ + *b |= _BITULL(n); +} + +static inline void gpiotools_change_bit(__u64 *b, int n) +{ + *b ^= _BITULL(n); +} + +static inline void gpiotools_clear_bit(__u64 *b, int n) +{ + *b &= ~_BITULL(n); +} + +static inline int gpiotools_test_bit(__u64 b, int n) +{ + return !!(b & _BITULL(n)); +} + +static inline void gpiotools_assign_bit(__u64 *b, int n, bool value) +{ + if (value) + gpiotools_set_bit(b, n); + else + gpiotools_clear_bit(b, n); +} #endif /* _GPIO_UTILS_H_ */ diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index 5cea24fddfa7..f229ec62301b 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -21,8 +21,8 @@ int main(int argc, char **argv) { - struct gpioline_info_changed chg; - struct gpioline_info req; + struct gpio_v2_line_info_changed chg; + struct gpio_v2_line_info req; struct pollfd pfd; int fd, i, j, ret; char *event, *end; @@ -40,11 +40,11 @@ int main(int argc, char **argv) for (i = 0, j = 2; i < argc - 2; i++, j++) { memset(&req, 0, sizeof(req)); - req.line_offset = strtoul(argv[j], &end, 0); + req.offset = strtoul(argv[j], &end, 0); if (*end != '\0') goto err_usage; - ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req); if (ret) { perror("unable to set up line watch"); return EXIT_FAILURE; @@ -71,13 +71,13 @@ int main(int argc, char **argv) } switch (chg.event_type) { - case GPIOLINE_CHANGED_REQUESTED: + case GPIO_V2_LINE_CHANGED_REQUESTED: event = "requested"; break; - case GPIOLINE_CHANGED_RELEASED: + case GPIO_V2_LINE_CHANGED_RELEASED: event = "released"; break; - case GPIOLINE_CHANGED_CONFIG: + case GPIO_V2_LINE_CHANGED_CONFIG: event = "config changed"; break; default: @@ -87,7 +87,7 @@ int main(int argc, char **argv) } printf("line %u: %s at %llu\n", - chg.info.line_offset, event, chg.timestamp); + chg.info.offset, event, chg.timestamp_ns); } } diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index b08d7a5e779b..5a05a454d0c9 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -25,57 +25,73 @@ struct gpio_flag { char *name; - unsigned long mask; + unsigned long long mask; }; struct gpio_flag flagnames[] = { { - .name = "kernel", - .mask = GPIOLINE_FLAG_KERNEL, + .name = "used", + .mask = GPIO_V2_LINE_FLAG_USED, + }, + { + .name = "input", + .mask = GPIO_V2_LINE_FLAG_INPUT, }, { .name = "output", - .mask = GPIOLINE_FLAG_IS_OUT, + .mask = GPIO_V2_LINE_FLAG_OUTPUT, }, { .name = "active-low", - .mask = GPIOLINE_FLAG_ACTIVE_LOW, + .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW, }, { .name = "open-drain", - .mask = GPIOLINE_FLAG_OPEN_DRAIN, + .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN, }, { .name = "open-source", - .mask = GPIOLINE_FLAG_OPEN_SOURCE, + .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE, }, { .name = "pull-up", - .mask = GPIOLINE_FLAG_BIAS_PULL_UP, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP, }, { .name = "pull-down", - .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN, }, { .name = "bias-disabled", - .mask = GPIOLINE_FLAG_BIAS_DISABLE, + .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED, }, }; -void print_flags(unsigned long flags) +static void print_attributes(struct gpio_v2_line_info *info) { int i; - int printed = 0; + const char *field_format = "%s"; for (i = 0; i < ARRAY_SIZE(flagnames); i++) { - if (flags & flagnames[i].mask) { - if (printed) - fprintf(stdout, " "); - fprintf(stdout, "%s", flagnames[i].name); - printed++; + if (info->flags & flagnames[i].mask) { + fprintf(stdout, field_format, flagnames[i].name); + field_format = ", %s"; } } + + if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) && + (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)) + fprintf(stdout, field_format, "both-edges"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) + fprintf(stdout, field_format, "rising-edge"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING) + fprintf(stdout, field_format, "falling-edge"); + + for (i = 0; i < info->num_attrs; i++) { + if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE) + fprintf(stdout, ", debounce_period=%dusec", + info->attrs[0].debounce_period_us); + } } int list_device(const char *device_name) @@ -109,18 +125,18 @@ int list_device(const char *device_name) /* Loop over the lines and print info */ for (i = 0; i < cinfo.lines; i++) { - struct gpioline_info linfo; + struct gpio_v2_line_info linfo; memset(&linfo, 0, sizeof(linfo)); - linfo.line_offset = i; + linfo.offset = i; - ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo); if (ret == -1) { ret = -errno; perror("Failed to issue LINEINFO IOCTL\n"); goto exit_close_error; } - fprintf(stdout, "\tline %2d:", linfo.line_offset); + fprintf(stdout, "\tline %2d:", linfo.offset); if (linfo.name[0]) fprintf(stdout, " \"%s\"", linfo.name); else @@ -131,7 +147,7 @@ int list_device(const char *device_name) fprintf(stdout, " unused"); if (linfo.flags) { fprintf(stdout, " ["); - print_flags(linfo.flags); + print_attributes(&linfo); fprintf(stdout, "]"); } fprintf(stdout, "\n"); diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h new file mode 100644 index 000000000000..89135bb35bf7 --- /dev/null +++ b/tools/include/linux/static_call_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include <linux/types.h> +#include <linux/stringify.h> + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + +#endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 995b36c2ea7d..f2b5d72a46c2 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat) #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 -__SC_COMP(__NR_mount, sys_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) @@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read) #define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define __NR_readv 65 -__SC_COMP(__NR_readv, sys_readv, compat_sys_readv) +__SC_COMP(__NR_readv, sys_readv, sys_readv) #define __NR_writev 66 -__SC_COMP(__NR_writev, sys_writev, compat_sys_writev) +__SC_COMP(__NR_writev, sys_writev, sys_writev) #define __NR_pread64 67 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64) #define __NR_pwrite64 68 @@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) /* fs/splice.c */ #define __NR_vmsplice 75 -__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 @@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns) #define __NR_sendmmsg 269 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #define __NR_process_vm_readv 270 -__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ - compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 271 -__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ - compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 272 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt index 33ba98d72b16..99d00870b160 100644 --- a/tools/memory-model/Documentation/cheatsheet.txt +++ b/tools/memory-model/Documentation/cheatsheet.txt @@ -3,9 +3,9 @@ C Self R W RMW Self R W DR DW RMW SV -- ---- - - --- ---- - - -- -- --- -- -Store, e.g., WRITE_ONCE() Y Y -Load, e.g., READ_ONCE() Y Y Y Y -Unsuccessful RMW operation Y Y Y Y +Relaxed store Y Y +Relaxed load Y Y Y Y +Relaxed RMW operation Y Y Y Y rcu_dereference() Y Y Y Y Successful *_acquire() R Y Y Y Y Y Y Successful *_release() C Y Y Y W Y @@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y smp_mb__after_atomic() CP a a Y Y Y Y Y Y -Key: C: Ordering is cumulative - P: Ordering propagates - R: Read, for example, READ_ONCE(), or read portion of RMW - W: Write, for example, WRITE_ONCE(), or write portion of RMW - Y: Provides ordering - a: Provides ordering given intervening RMW atomic operation - DR: Dependent read (address dependency) - DW: Dependent write (address, data, or control dependency) - RMW: Atomic read-modify-write operation - SELF: Orders self, as opposed to accesses before and/or after - SV: Orders later accesses to the same variable +Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(), + a *_relaxed() RMW operation, an unsuccessful RMW + operation, a non-value-returning RMW operation such + as atomic_inc(), or one of the atomic*_read() and + atomic*_set() family of operations. + C: Ordering is cumulative + P: Ordering propagates + R: Read, for example, READ_ONCE(), or read portion of RMW + W: Write, for example, WRITE_ONCE(), or write portion of RMW + Y: Provides ordering + a: Provides ordering given intervening RMW atomic operation + DR: Dependent read (address dependency) + DW: Dependent write (address, data, or control dependency) + RMW: Atomic read-modify-write operation + SELF: Orders self, as opposed to accesses before and/or after + SV: Orders later accesses to the same variable diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt new file mode 100644 index 000000000000..2f840dcd15cf --- /dev/null +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -0,0 +1,1074 @@ +Linux-Kernel Memory Model Litmus Tests +====================================== + +This file describes the LKMM litmus-test format by example, describes +some tricks and traps, and finally outlines LKMM's limitations. Earlier +versions of this material appeared in a number of LWN articles, including: + +https://lwn.net/Articles/720550/ + A formal kernel memory-ordering model (part 2) +https://lwn.net/Articles/608550/ + Axiomatic validation of memory barriers and atomic instructions +https://lwn.net/Articles/470681/ + Validating Memory Barriers and Atomic Instructions + +This document presents information in decreasing order of applicability, +so that, where possible, the information that has proven more commonly +useful is shown near the beginning. + +For information on installing LKMM, including the underlying "herd7" +tool, please see tools/memory-model/README. + + +Copy-Pasta +========== + +As with other software, it is often better (if less macho) to adapt an +existing litmus test than it is to create one from scratch. A number +of litmus tests may be found in the kernel source tree: + + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available on github +and kernel.org: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +The -l and -L arguments to "git grep" can be quite helpful in identifying +existing litmus tests that are similar to the one you need. But even if +you start with an existing litmus test, it is still helpful to have a +good understanding of the litmus-test format. + + +Examples and Format +=================== + +This section describes the overall format of litmus tests, starting +with a small example of the message-passing pattern and moving on to +more complex examples that illustrate explicit initialization and LKMM's +minimalistic set of flow-control statements. + + +Message-Passing Example +----------------------- + +This section gives an overview of the format of a litmus test using an +example based on the common message-passing use case. This use case +appears often in the Linux kernel. For example, a flag (modeled by "y" +below) indicates that a buffer (modeled by "x" below) is now completely +filled in and ready for use. It would be very bad if the consumer saw the +flag set, but, due to memory misordering, saw old values in the buffer. + +This example asks whether smp_store_release() and smp_load_acquire() +suffices to avoid this bad outcome: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 WRITE_ONCE(*x, 1); + 8 smp_store_release(y, 1); + 9 } +10 +11 P1(int *x, int *y) +12 { +13 int r0; +14 int r1; +15 +16 r0 = smp_load_acquire(y); +17 r1 = READ_ONCE(*x); +18 } +19 +20 exists (1:r0=1 /\ 1:r1=0) + +Line 1 starts with "C", which identifies this file as being in the +LKMM C-language format (which, as we will see, is a small fragment +of the full C language). The remainder of line 1 is the name of +the test, which by convention is the filename with the ".litmus" +suffix stripped. In this case, the actual test may be found in +tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +in the Linux-kernel source tree. + +Mechanically generated litmus tests will often have an optional +double-quoted comment string on the second line. Such strings are ignored +when running the test. Yes, you can add your own comments to litmus +tests, but this is a bit involved due to the use of multiple parsers. +For now, you can use C-language comments in the C code, and these comments +may be in either the "/* */" or the "//" style. A later section will +cover the full litmus-test commenting story. + +Line 3 is the initialization section. Because the default initialization +to zero suffices for this test, the "{}" syntax is used, which mean the +initialization section is empty. Litmus tests requiring non-default +initialization must have non-empty initialization sections, as in the +example that will be presented later in this document. + +Lines 5-9 show the first process and lines 11-18 the second process. Each +process corresponds to a Linux-kernel task (or kthread, workqueue, thread, +and so on; LKMM discussions often use these terms interchangeably). +The name of the first process is "P0" and that of the second "P1". +You can name your processes anything you like as long as the names consist +of a single "P" followed by a number, and as long as the numbers are +consecutive starting with zero. This can actually be quite helpful, +for example, a .litmus file matching "^P1(" but not matching "^P2(" +must contain a two-process litmus test. + +The argument list for each function are pointers to the global variables +used by that function. Unlike normal C-language function parameters, the +names are significant. The fact that both P0() and P1() have a formal +parameter named "x" means that these two processes are working with the +same global variable, also named "x". So the "int *x, int *y" on P0() +and P1() mean that both processes are working with two shared global +variables, "x" and "y". Global variables are always passed to processes +by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)". + +P0() has no local variables, but P1() has two of them named "r0" and "r1". +These names may be freely chosen, but for historical reasons stemming from +other litmus-test formats, it is conventional to use names consisting of +"r" followed by a number as shown here. A common bug in litmus tests +is forgetting to add a global variable to a process's parameter list. +This will sometimes result in an error message, but can also cause the +intended global to instead be silently treated as an undeclared local +variable. + +Each process's code is similar to Linux-kernel C, as can be seen on lines +7-8 and 13-17. This code may use many of the Linux kernel's atomic +operations, some of its exclusive-lock functions, and some of its RCU +and SRCU functions. An approximate list of the currently supported +functions may be found in the linux-kernel.def file. + +The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a +pointer in P0()'s parameter list, this does an unordered store to global +variable "x". Line 8 does "smp_store_release(y, 1)", and because "y" +is also in P0()'s parameter list, this does a release store to global +variable "y". + +The P1() process declares two local variables on lines 13 and 14. +Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load +from global variable "y" into local variable "r0". Line 17 does a +"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local +variable "r1". Both "x" and "y" are in P1()'s parameter list, so both +reference the same global variables that are used by P0(). + +Line 20 is the "exists" assertion expression to evaluate the final state. +This final state is evaluated after the dust has settled: both processes +have completed and all of their memory references and memory barriers +have propagated to all parts of the system. The references to the local +variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify +which process they are local to. + +Note that the assertion expression is written in the litmus-test +language rather than in C. For example, single "=" is an equality +operator rather than an assignment. The "/\" character combination means +"and". Similarly, "\/" stands for "or". Both of these are ASCII-art +representations of the corresponding mathematical symbols. Finally, +"~" stands for "logical not", which is "!" in C, and not to be confused +with the C-language "~" operator which instead stands for "bitwise not". +Parentheses may be used to override precedence. + +The "exists" assertion on line 20 is satisfied if the consumer sees the +flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1() +loaded a value from "x" that was equal to 1 but loaded a value from "y" +that was still equal to zero. + +This example can be checked by running the following command, which +absolutely must be run from the tools/memory-model directory and from +this directory only: + +herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus + +The output is the result of something similar to a full state-space +search, and is as follows: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=0) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.00 +12 Hash=579aaa14d8c35a39429b02e698241d09 + +The most pertinent line is line 10, which contains "Never 0 3", which +indicates that the bad result flagged by the "exists" clause never +happens. This line might instead say "Sometimes" to indicate that the +bad result happened in some but not all executions, or it might say +"Always" to indicate that the bad result happened in all executions. +(The herd7 tool doesn't judge, so it is only an LKMM convention that the +"exists" clause indicates a bad result. To see this, invert the "exists" +clause's condition and run the test.) The numbers ("0 3") at the end +of this line indicate the number of end states satisfying the "exists" +clause (0) and the number not not satisfying that clause (3). + +Another important part of this output is shown in lines 2-5, repeated here: + + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + +Line 2 gives the total number of end states, and each of lines 3-5 list +one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that +both of P1()'s loads returned the value "0". As expected, given the +"Never" on line 10, the state flagged by the "exists" clause is not +listed. This full list of states can be helpful when debugging a new +litmus test. + +The rest of the output is not normally needed, either due to irrelevance +or due to being redundant with the lines discussed above. However, the +following paragraph lists them for the benefit of readers possessed of +an insatiable curiosity. Other readers should feel free to skip ahead. + +Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's +"No" says that the "exists" clause was not satisfied by any execution, +and as such it has the same meaning as line 10's "Never". Line 7 is a +lead-in to line 8's "Positive: 0 Negative: 3", which lists the number +of end states satisfying and not satisfying the "exists" clause, just +like the two numbers at the end of line 10. Line 9 repeats the "exists" +clause so that you don't have to look it up in the litmus-test file. +The number at the end of line 11 (which begins with "Time") gives the +time in seconds required to analyze the litmus test. Small tests such +as this one complete in a few milliseconds, so "0.00" is quite common. +Line 12 gives a hash of the contents for the litmus-test file, and is used +by tooling that manages litmus tests and their output. This tooling is +used by people modifying LKMM itself, and among other things lets such +people know which of the several thousand relevant litmus tests were +affected by a given change to LKMM. + + +Initialization +-------------- + +The previous example relied on the default zero initialization for +"x" and "y", but a similar litmus test could instead initialize them +to some other value: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 { + 4 x=42; + 5 y=42; + 6 } + 7 + 8 P0(int *x, int *y) + 9 { +10 WRITE_ONCE(*x, 1); +11 smp_store_release(y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 int r1; +18 +19 r0 = smp_load_acquire(y); +20 r1 = READ_ONCE(*x); +21 } +22 +23 exists (1:r0=1 /\ 1:r1=42) + +Lines 3-6 now initialize both "x" and "y" to the value 42. This also +means that the "exists" clause on line 23 must change "1:r1=0" to +"1:r1=42". + +Running the test gives the same overall result as before, but with the +value 42 appearing in place of the value zero: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=1; 1:r1=1; + 4 1:r0=42; 1:r1=1; + 5 1:r0=42; 1:r1=42; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=42) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.02 +12 Hash=ab9a9b7940a75a792266be279a980156 + +It is tempting to avoid the open-coded repetitions of the value "42" +by defining another global variable "initval=42" and replacing all +occurrences of "42" with "initval". This will not, repeat *not*, +initialize "x" and "y" to 42, but instead to the address of "initval" +(try it!). See the section below on linked lists to learn more about +why this approach to initialization can be useful. + + +Control Structures +------------------ + +LKMM supports the C-language "if" statement, which allows modeling of +conditional branches. In LKMM, conditional branches can affect ordering, +but only if you are *very* careful (compilers are surprisingly able +to optimize away conditional branches). The following example shows +the "load buffering" (LB) use case that is used in the Linux kernel to +synchronize between ring-buffer producers and consumers. In the example +below, P0() is one side checking to see if an operation may proceed and +P1() is the other side completing its update. + + 1 C LB+fencembonceonce+ctrlonceonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r0; + 8 + 9 r0 = READ_ONCE(*x); +10 if (r0) +11 WRITE_ONCE(*y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 +18 r0 = READ_ONCE(*y); +19 smp_mb(); +20 WRITE_ONCE(*x, 1); +21 } +22 +23 exists (0:r0=1 /\ 1:r0=1) + +P1()'s "if" statement on line 10 works as expected, so that line 11 is +executed only if line 9 loads a non-zero value from "x". Because P1()'s +write of "1" to "x" happens only after P1()'s read from "y", one would +hope that the "exists" clause cannot be satisfied. LKMM agrees: + + 1 Test LB+fencembonceonce+ctrlonceonce Allowed + 2 States 2 + 3 0:r0=0; 1:r0=0; + 4 0:r0=1; 1:r0=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r0=1 /\ 1:r0=1) + 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2 +10 Time LB+fencembonceonce+ctrlonceonce 0.00 +11 Hash=e5260556f6de495fd39b556d1b831c3b + +However, there is no "while" statement due to the fact that full +state-space search has some difficulty with iteration. However, there +are tricks that may be used to handle some special cases, which are +discussed below. In addition, loop-unrolling tricks may be applied, +albeit sparingly. + + +Tricks and Traps +================ + +This section covers extracting debug output from herd7, emulating +spin loops, handling trivial linked lists, adding comments to litmus tests, +emulating call_rcu(), and finally tricks to improve herd7 performance +in order to better handle large litmus tests. + + +Debug Output +------------ + +By default, the herd7 state output includes all variables mentioned +in the "exists" clause. But sometimes debugging efforts are greatly +aided by the values of other variables. Consider this litmus test +(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but +slightly modified), which probes an obscure corner of hardware memory +ordering: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output is as follows: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r2=0; 1:r4=0; + 4 0:r2=0; 1:r4=1; + 5 0:r2=1; 1:r4=0; + 6 0:r2=1; 1:r4=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=c7f30fe0faebb7d565405d55b7318ada + +(This output indicates that CPUs are permitted to "snoop their own +store buffers", which all of Linux's CPU families other than s390 will +happily do. Such snooping results in disagreement among CPUs on the +order of stores from different CPUs, which is rarely an issue.) + +But the herd7 output shows only the two variables mentioned in the +"exists" clause. Someone modifying this test might wish to know the +values of "x", "y", "0:r1", and "0:r3" as well. The "locations" +statement on line 25 shows how to cause herd7 to display additional +variables: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 locations [0:r1; 1:r3; x; y] +26 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output then displays the values of all the variables: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1; + 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1; + 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1; + 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=40de8418c4b395388f6501cafd1ed38d + +What if you would like to know the value of a particular global variable +at some particular point in a given process's execution? One approach +is to use a READ_ONCE() to load that global variable into a new local +variable, then add that local variable to the "locations" clause. +But be careful: In some litmus tests, adding a READ_ONCE() will change +the outcome! For one example, please see the C-READ_ONCE.litmus and +C-READ_ONCE-omitted.litmus tests located here: + + https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/ + + +Spin Loops +---------- + +The analysis carried out by herd7 explores full state space, which is +at best of exponential time complexity. Adding processes and increasing +the amount of code in a give process can greatly increase execution time. +Potentially infinite loops, such as those used to wait for locks to +become available, are clearly problematic. + +Fortunately, it is possible to avoid state-space explosion by specially +modeling such loops. For example, the following litmus tests emulates +locking using xchg_acquire(), but instead of enclosing xchg_acquire() +in a spin loop, it instead excludes executions that fail to acquire the +lock using a herd7 "filter" clause. Note that for exclusive locking, you +are better off using the spin_lock() and spin_unlock() that LKMM directly +models, if for no other reason that these are much faster. However, the +techniques illustrated in this section can be used for other purposes, +such as emulating reader-writer locking, which LKMM does not yet model. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 } + 5 + 6 P0(int *sl, int *x0, int *x1) + 7 { + 8 int r2; + 9 int r1; +10 +11 r2 = xchg_acquire(sl, 1); +12 WRITE_ONCE(*x0, 1); +13 r1 = READ_ONCE(*x1); +14 smp_store_release(sl, 0); +15 } +16 +17 P1(int *sl, int *x0, int *x1) +18 { +19 int r2; +20 int r1; +21 +22 r2 = xchg_acquire(sl, 1); +23 WRITE_ONCE(*x1, 1); +24 r1 = READ_ONCE(*x0); +25 smp_store_release(sl, 0); +26 } +27 +28 filter (0:r2=0 /\ 1:r2=0) +29 exists (0:r1=0 /\ 1:r1=0) + +This litmus test may be found here: + +https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus + +This test uses two global variables, "x1" and "x2", and also emulates a +single global spinlock named "sl". This spinlock is held by whichever +process changes the value of "sl" from "0" to "1", and is released when +that process sets "sl" back to "0". P0()'s lock acquisition is emulated +on line 11 using xchg_acquire(), which unconditionally stores the value +"1" to "sl" and stores either "0" or "1" to "r2", depending on whether +the lock acquisition was successful or unsuccessful (due to "sl" already +having the value "1"), respectively. P1() operates in a similar manner. + +Rather unconventionally, execution appears to proceed to the critical +section on lines 12 and 13 in either case. Line 14 then uses an +smp_store_release() to store zero to "sl", thus emulating lock release. + +The case where xchg_acquire() fails to acquire the lock is handled by +the "filter" clause on line 28, which tells herd7 to keep only those +executions in which both "0:r2" and "1:r2" are zero, that is to pay +attention only to those executions in which both locks are actually +acquired. Thus, the bogus executions that would execute the critical +sections are discarded and any effects that they might have had are +ignored. Note well that the "filter" clause keeps those executions +for which its expression is satisfied, that is, for which the expression +evaluates to true. In other words, the "filter" clause says what to +keep, not what to discard. + +The result of running this test is as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 2 + 3 0:r1=0; 1:r1=1; + 4 0:r1=1; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r1=0 /\ 1:r1=0) + 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2 +10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03 + +The "Never" on line 9 indicates that this use of xchg_acquire() and +smp_store_release() really does correctly emulate locking. + +Why doesn't the litmus test take the simpler approach of using a spin loop +to handle failed spinlock acquisitions, like the kernel does? The key +insight behind this litmus test is that spin loops have no effect on the +possible "exists"-clause outcomes of program execution in the absence +of deadlock. In other words, given a high-quality lock-acquisition +primitive in a deadlock-free program running on high-quality hardware, +each lock acquisition will eventually succeed. Because herd7 already +explores the full state space, the length of time required to actually +acquire the lock does not matter. After all, herd7 already models all +possible durations of the xchg_acquire() statements. + +Why not just add the "filter" clause to the "exists" clause, thus +avoiding the "filter" clause entirely? This does work, but is slower. +The reason that the "filter" clause is faster is that (in the common case) +herd7 knows to abandon an execution as soon as the "filter" expression +fails to be satisfied. In contrast, the "exists" clause is evaluated +only at the end of time, thus requiring herd7 to waste time on bogus +executions in which both critical sections proceed concurrently. In +addition, some LKMM users like the separation of concerns provided by +using the both the "filter" and "exists" clauses. + +Readers lacking a pathological interest in odd corner cases should feel +free to skip the remainder of this section. + +But what if the litmus test were to temporarily set "0:r2" to a non-zero +value? Wouldn't that cause herd7 to abandon the execution prematurely +due to an early mismatch of the "filter" clause? + +Why not just try it? Line 4 of the following modified litmus test +introduces a new global variable "x2" that is initialized to "1". Line 23 +of P1() reads that variable into "1:r2" to force an early mismatch with +the "filter" clause. Line 24 does a known-true "if" condition to avoid +and static analysis that herd7 might do. Finally the "exists" clause +on line 32 is updated to a condition that is alway satisfied at the end +of the test. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 x2=1; + 5 } + 6 + 7 P0(int *sl, int *x0, int *x1) + 8 { + 9 int r2; +10 int r1; +11 +12 r2 = xchg_acquire(sl, 1); +13 WRITE_ONCE(*x0, 1); +14 r1 = READ_ONCE(*x1); +15 smp_store_release(sl, 0); +16 } +17 +18 P1(int *sl, int *x0, int *x1, int *x2) +19 { +20 int r2; +21 int r1; +22 +23 r2 = READ_ONCE(*x2); +24 if (r2) +25 r2 = xchg_acquire(sl, 1); +26 WRITE_ONCE(*x1, 1); +27 r1 = READ_ONCE(*x0); +28 smp_store_release(sl, 0); +29 } +30 +31 filter (0:r2=0 /\ 1:r2=0) +32 exists (x1=1) + +If the "filter" clause were to check each variable at each point in the +execution, running this litmus test would display no executions because +all executions would be filtered out at line 23. However, the output +is instead as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 1 + 3 x1=1; + 4 Ok + 5 Witnesses + 6 Positive: 2 Negative: 0 + 7 Condition exists (x1=1) + 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0 + 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04 +10 Hash=080bc508da7f291e122c6de76c0088e3 + +Line 3 shows that there is one execution that did not get filtered out, +so the "filter" clause is evaluated only on the last assignment to +the variables that it checks. In this case, the "filter" clause is a +disjunction, so it might be evaluated twice, once at the final (and only) +assignment to "0:r2" and once at the final assignment to "1:r2". + + +Linked Lists +------------ + +LKMM can handle linked lists, but only linked lists in which each node +contains nothing except a pointer to the next node in the list. This is +of course quite restrictive, but there is nevertheless quite a bit that +can be done within these confines, as can be seen in the litmus test +at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus: + + 1 C MP+onceassign+derefonce + 2 + 3 { + 4 y=z; + 5 z=0; + 6 } + 7 + 8 P0(int *x, int **y) + 9 { +10 WRITE_ONCE(*x, 1); +11 rcu_assign_pointer(*y, x); +12 } +13 +14 P1(int *x, int **y) +15 { +16 int *r0; +17 int r1; +18 +19 rcu_read_lock(); +20 r0 = rcu_dereference(*y); +21 r1 = READ_ONCE(*r0); +22 rcu_read_unlock(); +23 } +24 +25 exists (1:r0=x /\ 1:r1=0) + +Line 4's "y=z" may seem odd, given that "z" has not yet been initialized. +But "y=z" does not set the value of "y" to that of "z", but instead +sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore +create a simple linked list, with "y" pointing to "z" and "z" having a +NULL pointer. A much longer linked list could be created if desired, +and circular singly linked lists can also be created and manipulated. + +The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s +"r0" not to the value of "x", but again to its address. This term of the +"exists" clause therefore tests whether line 20's load from "y" saw the +value stored by line 11, which is in fact what is required in this case. + +P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x" +from "y", replacing "z". + +P1()'s line 20 loads a pointer from "y", and line 21 dereferences that +pointer. The RCU read-side critical section spanning lines 19-22 is just +for show in this example. Note that the address used for line 21's load +depends on (in this case, "is exactly the same as") the value loaded by +line 20. This is an example of what is called an "address dependency". +This particular address dependency extends from the load on line 20 to the +load on line 21. Address dependencies provide a weak form of ordering. + +Running this test results in the following: + + 1 Test MP+onceassign+derefonce Allowed + 2 States 2 + 3 1:r0=x; 1:r1=1; + 4 1:r0=z; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (1:r0=x /\ 1:r1=0) + 9 Observation MP+onceassign+derefonce Never 0 2 +10 Time MP+onceassign+derefonce 0.00 +11 Hash=49ef7a741563570102448a256a0c8568 + +The only possible outcomes feature P1() loading a pointer to "z" +(which contains zero) on the one hand and P1() loading a pointer to "x" +(which contains the value one) on the other. This should be reassuring +because it says that RCU readers cannot see the old preinitialization +values when accessing a newly inserted list node. This undesirable +scenario is flagged by the "exists" clause, and would occur if P1() +loaded a pointer to "x", but obtained the pre-initialization value of +zero after dereferencing that pointer. + + +Comments +-------- + +Different portions of a litmus test are processed by different parsers, +which has the charming effect of requiring different comment syntax in +different portions of the litmus test. The C-syntax portions use +C-language comments (either "/* */" or "//"), while the other portions +use Ocaml comments "(* *)". + +The following litmus test illustrates the comment style corresponding +to each syntactic unit of the test: + + 1 C MP+onceassign+derefonce (* A *) + 2 + 3 (* B *) + 4 + 5 { + 6 y=z; (* C *) + 7 z=0; + 8 } // D + 9 +10 // E +11 +12 P0(int *x, int **y) // F +13 { +14 WRITE_ONCE(*x, 1); // G +15 rcu_assign_pointer(*y, x); +16 } +17 +18 // H +19 +20 P1(int *x, int **y) +21 { +22 int *r0; +23 int r1; +24 +25 rcu_read_lock(); +26 r0 = rcu_dereference(*y); +27 r1 = READ_ONCE(*r0); +28 rcu_read_unlock(); +29 } +30 +31 // I +32 +33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *) + +In short, use C-language comments in the C code and Ocaml comments in +the rest of the litmus test. + +On the other hand, if you prefer C-style comments everywhere, the +C preprocessor is your friend. + + +Asynchronous RCU Grace Periods +------------------------------ + +The following litmus test is derived from the example show in +Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to +emulate call_rcu(): + + 1 C RCU+sync+free + 2 + 3 { + 4 int x = 1; + 5 int *y = &x; + 6 int z = 1; + 7 } + 8 + 9 P0(int *x, int *z, int **y) +10 { +11 int *r0; +12 int r1; +13 +14 rcu_read_lock(); +15 r0 = rcu_dereference(*y); +16 r1 = READ_ONCE(*r0); +17 rcu_read_unlock(); +18 } +19 +20 P1(int *z, int **y, int *c) +21 { +22 rcu_assign_pointer(*y, z); +23 smp_store_release(*c, 1); // Emulate call_rcu(). +24 } +25 +26 P2(int *x, int *z, int **y, int *c) +27 { +28 int r0; +29 +30 r0 = smp_load_acquire(*c); // Note call_rcu() request. +31 synchronize_rcu(); // Wait one grace period. +32 WRITE_ONCE(*x, 0); // Emulate the RCU callback. +33 } +34 +35 filter (2:r0=1) (* Reject too-early starts. *) +36 exists (0:r0=x /\ 0:r1=0) + +Lines 4-6 initialize a linked list headed by "y" that initially contains +"x". In addition, "z" is pre-initialized to prepare for P1(), which +will replace "x" with "z" in this list. + +P0() on lines 9-18 enters an RCU read-side critical section, loads the +list header "y" and dereferences it, leaving the node in "0:r0" and +the node's value in "0:r1". + +P1() on lines 20-24 updates the list header to instead reference "z", +then emulates call_rcu() by doing a release store into "c". + +P2() on lines 27-33 emulates the behind-the-scenes effect of doing a +call_rcu(). Line 30 first does an acquire load from "c", then line 31 +waits for an RCU grace period to elapse, and finally line 32 emulates +the RCU callback, which in turn emulates a call to kfree(). + +Of course, it is possible for P2() to start too soon, so that the +value of "2:r0" is zero rather than the required value of "1". +The "filter" clause on line 35 handles this possibility, rejecting +all executions in which "2:r0" is not equal to the value "1". + + +Performance +----------- + +LKMM's exploration of the full state-space can be extremely helpful, +but it does not come for free. The price is exponential computational +complexity in terms of the number of processes, the average number +of statements in each process, and the total number of stores in the +litmus test. + +So it is best to start small and then work up. Where possible, break +your code down into small pieces each representing a core concurrency +requirement. + +That said, herd7 is quite fast. On an unprepossessing x86 laptop, it +was able to analyze the following 10-process RCU litmus test in about +six seconds. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus + +One way to make herd7 run faster is to use the "-speedcheck true" option. +This option prevents herd7 from generating all possible end states, +instead causing it to focus solely on whether or not the "exists" +clause can be satisfied. With this option, herd7 evaluates the above +litmus test in about 300 milliseconds, for more than an order of magnitude +improvement in performance. + +Larger 16-process litmus tests that would normally consume 15 minutes +of time complete in about 40 seconds with this option. To be fair, +you do get an extra 65,535 states when you leave off the "-speedcheck +true" option. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus + +Nevertheless, litmus-test analysis really is of exponential complexity, +whether with or without "-speedcheck true". Increasing by just three +processes to a 19-process litmus test requires 2 hours and 40 minutes +without, and about 8 minutes with "-speedcheck true". Each of these +results represent roughly an order of magnitude slowdown compared to the +16-process litmus test. Again, to be fair, the multi-hour run explores +no fewer than 524,287 additional states compared to the shorter one. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus + +If you don't like command-line arguments, you can obtain a similar speedup +by adding a "filter" clause with exactly the same expression as your +"exists" clause. + +However, please note that seeing the full set of states can be extremely +helpful when developing and debugging litmus tests. + + +LIMITATIONS +=========== + +Limitations of the Linux-kernel memory model (LKMM) include: + +1. Compiler optimizations are not accurately modeled. Of course, + the use of READ_ONCE() and WRITE_ONCE() limits the compiler's + ability to optimize, but under some circumstances it is possible + for the compiler to undermine the memory model. For more + information, see Documentation/explanation.txt (in particular, + the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" + sections). + + Note that this limitation in turn limits LKMM's ability to + accurately model address, control, and data dependencies. + For example, if the compiler can deduce the value of some variable + carrying a dependency, then the compiler can break that dependency + by substituting a constant of that value. + +2. Multiple access sizes for a single variable are not supported, + and neither are misaligned or partially overlapping accesses. + +3. Exceptions and interrupts are not modeled. In some cases, + this limitation can be overcome by modeling the interrupt or + exception with an additional process. + +4. I/O such as MMIO or DMA is not supported. + +5. Self-modifying code (such as that found in the kernel's + alternatives mechanism, function tracer, Berkeley Packet Filter + JIT compiler, and module loader) is not supported. + +6. Complete modeling of all variants of atomic read-modify-write + operations, locking primitives, and RCU is not provided. + For example, call_rcu() and rcu_barrier() are not supported. + However, a substantial amount of support is provided for these + operations, as shown in the linux-kernel.def file. + + Here are specific limitations: + + a. When rcu_assign_pointer() is passed NULL, the Linux + kernel provides no ordering, but LKMM models this + case as a store release. + + b. The "unless" RMW operations are not currently modeled: + atomic_long_add_unless(), atomic_inc_unless_negative(), + and atomic_dec_unless_positive(). These can be emulated + in litmus tests, for example, by using atomic_cmpxchg(). + + One exception of this limitation is atomic_add_unless(), + which is provided directly by herd7 (so no corresponding + definition in linux-kernel.def). atomic_add_unless() is + modeled by herd7 therefore it can be used in litmus tests. + + c. The call_rcu() function is not modeled. As was shown above, + it can be emulated in litmus tests by adding another + process that invokes synchronize_rcu() and the body of the + callback function, with (for example) a release-acquire + from the site of the emulated call_rcu() to the beginning + of the additional process. + + d. The rcu_barrier() function is not modeled. It can be + emulated in litmus tests emulating call_rcu() via + (for example) a release-acquire from the end of each + additional call_rcu() process to the site of the + emulated rcu-barrier(). + + e. Although sleepable RCU (SRCU) is now modeled, there + are some subtle differences between its semantics and + those in the Linux kernel. For example, the kernel + might interpret the following sequence as two partially + overlapping SRCU read-side critical sections: + + 1 r1 = srcu_read_lock(&my_srcu); + 2 do_something_1(); + 3 r2 = srcu_read_lock(&my_srcu); + 4 do_something_2(); + 5 srcu_read_unlock(&my_srcu, r1); + 6 do_something_3(); + 7 srcu_read_unlock(&my_srcu, r2); + + In contrast, LKMM will interpret this as a nested pair of + SRCU read-side critical sections, with the outer critical + section spanning lines 1-7 and the inner critical section + spanning lines 3-5. + + This difference would be more of a concern had anyone + identified a reasonable use case for partially overlapping + SRCU read-side critical sections. For more information + on the trickiness of such overlapping, please see: + https://paulmck.livejournal.com/40593.html + + f. Reader-writer locking is not modeled. It can be + emulated in litmus tests using atomic read-modify-write + operations. + +The fragment of the C language supported by these litmus tests is quite +limited and in some ways non-standard: + +1. There is no automatic C-preprocessor pass. You can of course + run it manually, if you choose. + +2. There is no way to create functions other than the Pn() functions + that model the concurrent processes. + +3. The Pn() functions' formal parameters must be pointers to the + global shared variables. Nothing can be passed by value into + these functions. + +4. The only functions that can be invoked are those built directly + into herd7 or that are defined in the linux-kernel.def file. + +5. The "switch", "do", "for", "while", and "goto" C statements are + not supported. The "switch" statement can be emulated by the + "if" statement. The "do", "for", and "while" statements can + often be emulated by manually unrolling the loop, or perhaps by + enlisting the aid of the C preprocessor to minimize the resulting + code duplication. Some uses of "goto" can be emulated by "if", + and some others by unrolling. + +6. Although you can use a wide variety of types in litmus-test + variable declarations, and especially in global-variable + declarations, the "herd7" tool understands only int and + pointer types. There is no support for floating-point types, + enumerations, characters, strings, arrays, or structures. + +7. Parsing of variable declarations is very loose, with almost no + type checking. + +8. Initializers differ from their C-language counterparts. + For example, when an initializer contains the name of a shared + variable, that name denotes a pointer to that variable, not + the current value of that variable. For example, "int x = y" + is interpreted the way "int x = &y" would be in C. + +9. Dynamic memory allocation is not supported, although this can + be worked around in some cases by supplying multiple statically + allocated variables. + +Some of these limitations may be overcome in the future, but others are +more likely to be addressed by incorporating the Linux-kernel memory model +into other tools. + +Finally, please note that LKMM is subject to change as hardware, use cases, +and compilers evolve. diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt index 63c4adfed884..03f58b11c252 100644 --- a/tools/memory-model/Documentation/recipes.txt +++ b/tools/memory-model/Documentation/recipes.txt @@ -1,7 +1,7 @@ This document provides "recipes", that is, litmus tests for commonly occurring situations, as well as a few that illustrate subtly broken but attractive nuisances. Many of these recipes include example code from -v4.13 of the Linux kernel. +v5.7 of the Linux kernel. The first section covers simple special cases, the second section takes off the training wheels to cover more involved examples, @@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access first place (control dependency). Note that the term "data dependency" is sometimes casually used to cover both address and data dependencies. -In lib/prime_numbers.c, the expand_to_next_prime() function invokes +In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes rcu_assign_pointer(), and the next_prime_number() function invokes rcu_dereference(). This combination mediates access to a bit vector that is expanded as additional primes are needed. diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt index ecbbaa5396d4..c5fdfd19df24 100644 --- a/tools/memory-model/Documentation/references.txt +++ b/tools/memory-model/Documentation/references.txt @@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and semantics of the weak consistency model specification language - cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531 + cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531 Memory-model comparisons diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt new file mode 100644 index 000000000000..81e1a0ec5342 --- /dev/null +++ b/tools/memory-model/Documentation/simple.txt @@ -0,0 +1,271 @@ +This document provides options for those wishing to keep their +memory-ordering lives simple, as is necessary for those whose domain +is complex. After all, there are bugs other than memory-ordering bugs, +and the time spent gaining memory-ordering knowledge is not available +for gaining domain knowledge. Furthermore Linux-kernel memory model +(LKMM) is quite complex, with subtle differences in code often having +dramatic effects on correctness. + +The options near the beginning of this list are quite simple. The idea +is not that kernel hackers don't already know about them, but rather +that they might need the occasional reminder. + +Please note that this is a generic guide, and that specific subsystems +will often have special requirements or idioms. For example, developers +of MMIO-based device drivers will often need to use mb(), rmb(), and +wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb() +to be more natural than smp_load_acquire() and smp_store_release(). +On the other hand, those coming in from other environments will likely +be more familiar with these last two. + + +Single-threaded code +==================== + +In single-threaded code, there is no reordering, at least assuming +that your toolchain and hardware are working correctly. In addition, +it is generally a mistake to assume your code will only run in a single +threaded context as the kernel can enter the same code path on multiple +CPUs at the same time. One important exception is a function that makes +no external data references. + +In the general case, you will need to take explicit steps to ensure that +your code really is executed within a single thread that does not access +shared variables. A simple way to achieve this is to define a global lock +that you acquire at the beginning of your code and release at the end, +taking care to ensure that all references to your code's shared data are +also carried out under that same lock. Because only one thread can hold +this lock at a given time, your code will be executed single-threaded. +This approach is called "code locking". + +Code locking can severely limit both performance and scalability, so it +should be used with caution, and only on code paths that execute rarely. +After all, a huge amount of effort was required to remove the Linux +kernel's old "Big Kernel Lock", so let's please be very careful about +adding new "little kernel locks". + +One of the advantages of locking is that, in happy contrast with the +year 1981, almost all kernel developers are very familiar with locking. +The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with +the formerly feared deadlock scenarios. + +Please use the standard locking primitives provided by the kernel rather +than rolling your own. For one thing, the standard primitives interact +properly with lockdep. For another thing, these primitives have been +tuned to deal better with high contention. And for one final thing, it is +surprisingly hard to correctly code production-quality lock acquisition +and release functions. After all, even simple non-production-quality +locking functions must carefully prevent both the CPU and the compiler +from moving code in either direction across the locking function. + +Despite the scalability limitations of single-threaded code, RCU +takes this approach for much of its grace-period processing and also +for early-boot operation. The reason RCU is able to scale despite +single-threaded grace-period processing is use of batching, where all +updates that accumulated during one grace period are handled by the +next one. In other words, slowing down grace-period processing makes +it more efficient. Nor is RCU unique: Similar batching optimizations +are used in many I/O operations. + + +Packaged code +============= + +Even if performance and scalability concerns prevent your code from +being completely single-threaded, it is often possible to use library +functions that handle the concurrency nearly or entirely on their own. +This approach delegates any LKMM worries to the library maintainer. + +In the kernel, what is the "library"? Quite a bit. It includes the +contents of the lib/ directory, much of the include/linux/ directory along +with a lot of other heavily used APIs. But heavily used examples include +the list macros (for example, include/linux/{,rcu}list.h), workqueues, +smp_call_function(), and the various hash tables and search trees. + + +Data locking +============ + +With code locking, we use single-threaded code execution to guarantee +serialized access to the data that the code is accessing. However, +we can also achieve this by instead associating the lock with specific +instances of the data structures. This creates a "critical section" +in the code execution that will execute as though it is single threaded. +By placing all the accesses and modifications to a shared data structure +inside a critical section, we ensure that the execution context that +holds the lock has exclusive access to the shared data. + +The poster boy for this approach is the hash table, where placing a lock +in each hash bucket allows operations on different buckets to proceed +concurrently. This works because the buckets do not overlap with each +other, so that an operation on one bucket does not interfere with any +other bucket. + +As the number of buckets increases, data locking scales naturally. +In particular, if the amount of data increases with the number of CPUs, +increasing the number of buckets as the number of CPUs increase results +in a naturally scalable data structure. + + +Per-CPU processing +================== + +Partitioning processing and data over CPUs allows each CPU to take +a single-threaded approach while providing excellent performance and +scalability. Of course, there is no free lunch: The dark side of this +excellence is substantially increased memory footprint. + +In addition, it is sometimes necessary to occasionally update some global +view of this processing and data, in which case something like locking +must be used to protect this global view. This is the approach taken +by the percpu_counter infrastructure. In many cases, there are already +generic/library variants of commonly used per-cpu constructs available. +Please use them rather than rolling your own. + +RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU +data sets. For example, each CPU does private quiescent-state processing +within its instance of the per-CPU rcu_data structure, and then uses data +locking to report quiescent states up the grace-period combining tree. + + +Packaged primitives: Sequence locking +===================================== + +Lockless programming is considered by many to be more difficult than +lock-based programming, but there are a few lockless design patterns that +have been built out into an API. One of these APIs is sequence locking. +Although this APIs can be used in extremely complex ways, there are simple +and effective ways of using it that avoid the need to pay attention to +memory ordering. + +The basic keep-things-simple rule for sequence locking is "do not write +in read-side code". Yes, you can do writes from within sequence-locking +readers, but it won't be so simple. For example, such writes will be +lockless and should be idempotent. + +For more sophisticated use cases, LKMM can guide you, including use +cases involving combining sequence locking with other synchronization +primitives. (LKMM does not yet know about sequence locking, so it is +currently necessary to open-code it in your litmus tests.) + +Additional information may be found in include/linux/seqlock.h. + +Packaged primitives: RCU +======================== + +Another lockless design pattern that has been baked into an API +is RCU. The Linux kernel makes sophisticated use of RCU, but the +keep-things-simple rules for RCU are "do not write in read-side code" +and "do not update anything that is visible to and accessed by readers", +and "protect updates with locking". + +These rules are illustrated by the functions foo_update_a() and +foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional +RCU usage patterns maybe found in Documentation/RCU and in the +source code. + + +Packaged primitives: Atomic operations +====================================== + +Back in the day, the Linux kernel had three types of atomic operations: + +1. Initialization and read-out, such as atomic_set() and atomic_read(). + +2. Operations that did not return a value and provided no ordering, + such as atomic_inc() and atomic_dec(). + +3. Operations that returned a value and provided full ordering, such as + atomic_add_return() and atomic_dec_and_test(). Note that some + value-returning operations provide full ordering only conditionally. + For example, cmpxchg() provides ordering only upon success. + +More recent kernels have operations that return a value but do not +provide full ordering. These are flagged with either a _relaxed() +suffix (providing no ordering), or an _acquire() or _release() suffix +(providing limited ordering). + +Additional information may be found in these files: + +Documentation/atomic_t.txt +Documentation/atomic_bitops.txt +Documentation/core-api/atomic_ops.rst +Documentation/core-api/refcount-vs-atomic.rst + +Reading code using these primitives is often also quite helpful. + + +Lockless, fully ordered +======================= + +When using locking, there often comes a time when it is necessary +to access some variable or another without holding the data lock +that serializes access to that variable. + +If you want to keep things simple, use the initialization and read-out +operations from the previous section only when there are no racing +accesses. Otherwise, use only fully ordered operations when accessing +or modifying the variable. This approach guarantees that code prior +to a given access to that variable will be seen by all CPUs has having +happened before any code following any later access to that same variable. + +Please note that per-CPU functions are not atomic operations and +hence they do not provide any ordering guarantees at all. + +If the lockless accesses are frequently executed reads that are used +only for heuristics, or if they are frequently executed writes that +are used only for statistics, please see the next section. + + +Lockless statistics and heuristics +================================== + +Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and +WRITE_ONCE() can safely be used in some cases. These primitives provide +no ordering, but they do prevent the compiler from carrying out a number +of destructive optimizations (for which please see the next section). +One example use for these primitives is statistics, such as per-CPU +counters exemplified by the rt_cache_stat structure's routing-cache +statistics counters. Another example use case is heuristics, such as +the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters +controlling how often RCU scans for idle CPUs. + +But be careful. "Unordered" really does mean "unordered". It is all +too easy to assume ordering, and this assumption must be avoided when +using these primitives. + + +Don't let the compiler trip you up +================================== + +It can be quite tempting to use plain C-language accesses for lockless +loads from and stores to shared variables. Although this is both +possible and quite common in the Linux kernel, it does require a +surprising amount of analysis, care, and knowledge about the compiler. +Yes, some decades ago it was not unfair to consider a C compiler to be +an assembler with added syntax and better portability, but the advent of +sophisticated optimizing compilers mean that those days are long gone. +Today's optimizing compilers can profoundly rewrite your code during the +translation process, and have long been ready, willing, and able to do so. + +Therefore, if you really need to use C-language assignments instead of +READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good +understanding of both the C standard and your compiler. Here are some +introductory references and some tooling to start you on this noble quest: + +Who's afraid of a big bad optimizing compiler? + https://lwn.net/Articles/793253/ +Calibrating your fear of big bad optimizing compilers + https://lwn.net/Articles/799218/ +Concurrency bugs should fear the big bad data-race detector (part 1) + https://lwn.net/Articles/816850/ +Concurrency bugs should fear the big bad data-race detector (part 2) + https://lwn.net/Articles/816854/ + + +More complex use cases +====================== + +If the alternatives above do not do what you need, please look at the +recipes-pairs.txt file to peel off the next layer of the memory-ordering +onion. diff --git a/tools/memory-model/README b/tools/memory-model/README index ecb7385376bf..c8144d4aafa0 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -63,10 +63,32 @@ BASIC USAGE: HERD7 ================== The memory model is used, in conjunction with "herd7", to exhaustively -explore the state space of small litmus tests. +explore the state space of small litmus tests. Documentation describing +the format, features, capabilities and limitations of these litmus +tests is available in tools/memory-model/Documentation/litmus-tests.txt. -For example, to run SB+fencembonceonces.litmus against the memory model: +Example litmus tests may be found in the Linux-kernel source tree: + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available here: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +Documentation describing litmus tests and now to use them may be found +here: + + tools/memory-model/Documentation/litmus-tests.txt + +The remainder of this section uses the SB+fencembonceonces.litmus test +located in the tools/memory-model directory. + +To run SB+fencembonceonces.litmus against the memory model: + + $ cd $LINUX_SOURCE_TREE/tools/memory-model $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus Here is the corresponding output: @@ -87,7 +109,11 @@ Here is the corresponding output: The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that this litmus test's "exists" clause can not be satisfied. -See "herd7 -help" or "herdtools7/doc/" for more information. +See "herd7 -help" or "herdtools7/doc/" for more information on running the +tool itself, but please be aware that this documentation is intended for +people who work on the memory model itself, that is, people making changes +to the tools/memory-model/linux-kernel.* files. It is not intended for +people focusing on writing, understanding, and running LKMM litmus tests. ===================== @@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus test's "exists" clause was not reached. And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/" -for more information. +for more information. And again, please be aware that this documentation +is intended for people who work on the memory model itself, that is, +people making changes to the tools/memory-model/linux-kernel.* files. +It is not intended for people focusing on writing, understanding, and +running LKMM litmus tests. ==================== @@ -137,12 +167,21 @@ Documentation/cheatsheet.txt Documentation/explanation.txt Describes the memory model in detail. +Documentation/litmus-tests.txt + Describes the format, features, capabilities, and limitations + of the litmus tests that LKMM can evaluate. + Documentation/recipes.txt Lists common memory-ordering patterns. Documentation/references.txt Provides background reading. +Documentation/simple.txt + Starting point for someone new to Linux-kernel concurrency. + And also for those needing a reminder of the simpler approaches + to concurrency! + linux-kernel.bell Categorizes the relevant instructions, including memory references, memory barriers, atomic read-modify-write operations, @@ -187,116 +226,3 @@ README This file. scripts Various scripts, see scripts/README. - - -=========== -LIMITATIONS -=========== - -The Linux-kernel memory model (LKMM) has the following limitations: - -1. Compiler optimizations are not accurately modeled. Of course, - the use of READ_ONCE() and WRITE_ONCE() limits the compiler's - ability to optimize, but under some circumstances it is possible - for the compiler to undermine the memory model. For more - information, see Documentation/explanation.txt (in particular, - the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" - sections). - - Note that this limitation in turn limits LKMM's ability to - accurately model address, control, and data dependencies. - For example, if the compiler can deduce the value of some variable - carrying a dependency, then the compiler can break that dependency - by substituting a constant of that value. - -2. Multiple access sizes for a single variable are not supported, - and neither are misaligned or partially overlapping accesses. - -3. Exceptions and interrupts are not modeled. In some cases, - this limitation can be overcome by modeling the interrupt or - exception with an additional process. - -4. I/O such as MMIO or DMA is not supported. - -5. Self-modifying code (such as that found in the kernel's - alternatives mechanism, function tracer, Berkeley Packet Filter - JIT compiler, and module loader) is not supported. - -6. Complete modeling of all variants of atomic read-modify-write - operations, locking primitives, and RCU is not provided. - For example, call_rcu() and rcu_barrier() are not supported. - However, a substantial amount of support is provided for these - operations, as shown in the linux-kernel.def file. - - a. When rcu_assign_pointer() is passed NULL, the Linux - kernel provides no ordering, but LKMM models this - case as a store release. - - b. The "unless" RMW operations are not currently modeled: - atomic_long_add_unless(), atomic_inc_unless_negative(), - and atomic_dec_unless_positive(). These can be emulated - in litmus tests, for example, by using atomic_cmpxchg(). - - One exception of this limitation is atomic_add_unless(), - which is provided directly by herd7 (so no corresponding - definition in linux-kernel.def). atomic_add_unless() is - modeled by herd7 therefore it can be used in litmus tests. - - c. The call_rcu() function is not modeled. It can be - emulated in litmus tests by adding another process that - invokes synchronize_rcu() and the body of the callback - function, with (for example) a release-acquire from - the site of the emulated call_rcu() to the beginning - of the additional process. - - d. The rcu_barrier() function is not modeled. It can be - emulated in litmus tests emulating call_rcu() via - (for example) a release-acquire from the end of each - additional call_rcu() process to the site of the - emulated rcu-barrier(). - - e. Although sleepable RCU (SRCU) is now modeled, there - are some subtle differences between its semantics and - those in the Linux kernel. For example, the kernel - might interpret the following sequence as two partially - overlapping SRCU read-side critical sections: - - 1 r1 = srcu_read_lock(&my_srcu); - 2 do_something_1(); - 3 r2 = srcu_read_lock(&my_srcu); - 4 do_something_2(); - 5 srcu_read_unlock(&my_srcu, r1); - 6 do_something_3(); - 7 srcu_read_unlock(&my_srcu, r2); - - In contrast, LKMM will interpret this as a nested pair of - SRCU read-side critical sections, with the outer critical - section spanning lines 1-7 and the inner critical section - spanning lines 3-5. - - This difference would be more of a concern had anyone - identified a reasonable use case for partially overlapping - SRCU read-side critical sections. For more information, - please see: https://paulmck.livejournal.com/40593.html - - f. Reader-writer locking is not modeled. It can be - emulated in litmus tests using atomic read-modify-write - operations. - -The "herd7" tool has some additional limitations of its own, apart from -the memory model: - -1. Non-trivial data structures such as arrays or structures are - not supported. However, pointers are supported, allowing trivial - linked lists to be constructed. - -2. Dynamic memory allocation is not supported, although this can - be worked around in some cases by supplying multiple statically - allocated variables. - -Some of these limitations may be overcome in the future, but others are -more likely to be addressed by incorporating the Linux-kernel memory model -into other tools. - -Finally, please note that LKMM is subject to change as hardware, use cases, -and compilers evolve. diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 42ac19e0299c..326ac390168b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -16,6 +16,7 @@ #include <linux/hashtable.h> #include <linux/kernel.h> +#include <linux/static_call_types.h> #define FAKE_JUMP_OFFSET -1 @@ -433,6 +434,103 @@ reachable: return 0; } +static int create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -528,6 +626,61 @@ static const char *uaccess_safe_builtin[] = { "__tsan_write4", "__tsan_write8", "__tsan_write16", + "__tsan_read_write1", + "__tsan_read_write2", + "__tsan_read_write4", + "__tsan_read_write8", + "__tsan_read_write16", + "__tsan_atomic8_load", + "__tsan_atomic16_load", + "__tsan_atomic32_load", + "__tsan_atomic64_load", + "__tsan_atomic8_store", + "__tsan_atomic16_store", + "__tsan_atomic32_store", + "__tsan_atomic64_store", + "__tsan_atomic8_exchange", + "__tsan_atomic16_exchange", + "__tsan_atomic32_exchange", + "__tsan_atomic64_exchange", + "__tsan_atomic8_fetch_add", + "__tsan_atomic16_fetch_add", + "__tsan_atomic32_fetch_add", + "__tsan_atomic64_fetch_add", + "__tsan_atomic8_fetch_sub", + "__tsan_atomic16_fetch_sub", + "__tsan_atomic32_fetch_sub", + "__tsan_atomic64_fetch_sub", + "__tsan_atomic8_fetch_and", + "__tsan_atomic16_fetch_and", + "__tsan_atomic32_fetch_and", + "__tsan_atomic64_fetch_and", + "__tsan_atomic8_fetch_or", + "__tsan_atomic16_fetch_or", + "__tsan_atomic32_fetch_or", + "__tsan_atomic64_fetch_or", + "__tsan_atomic8_fetch_xor", + "__tsan_atomic16_fetch_xor", + "__tsan_atomic32_fetch_xor", + "__tsan_atomic64_fetch_xor", + "__tsan_atomic8_fetch_nand", + "__tsan_atomic16_fetch_nand", + "__tsan_atomic32_fetch_nand", + "__tsan_atomic64_fetch_nand", + "__tsan_atomic8_compare_exchange_strong", + "__tsan_atomic16_compare_exchange_strong", + "__tsan_atomic32_compare_exchange_strong", + "__tsan_atomic64_compare_exchange_strong", + "__tsan_atomic8_compare_exchange_weak", + "__tsan_atomic16_compare_exchange_weak", + "__tsan_atomic32_compare_exchange_weak", + "__tsan_atomic64_compare_exchange_weak", + "__tsan_atomic8_compare_exchange_val", + "__tsan_atomic16_compare_exchange_val", + "__tsan_atomic32_compare_exchange_val", + "__tsan_atomic64_compare_exchange_val", + "__tsan_atomic_thread_fence", + "__tsan_atomic_signal_fence", /* KCOV */ "write_comp_data", "check_kcov_mode", @@ -650,6 +803,10 @@ static int add_jump_destinations(struct objtool_file *file) } else { /* external sibling call */ insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } continue; } @@ -701,6 +858,10 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call */ insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } } } } @@ -1523,6 +1684,23 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +static int read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1570,6 +1748,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -2433,6 +2615,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; + if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + break; case INSN_JUMP_CONDITIONAL: @@ -2792,6 +2979,7 @@ int check(const char *_objname, bool orc) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; @@ -2839,6 +3027,11 @@ int check(const char *_objname, bool orc) warnings += ret; } + ret = create_static_call_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + if (orc) { ret = create_orc(&file); if (ret < 0) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 061aa96e15d3..36d38b9153ac 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head static_call_node; struct section *sec; unsigned long offset; unsigned int len; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ddbd66f1a37..4e1d7460574b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -652,7 +652,7 @@ err: } struct section *elf_create_section(struct elf *elf, const char *name, - size_t entsize, int nr) + unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; @@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_entsize = entsize; sec->sh.sh_type = SHT_PROGBITS; sec->sh.sh_addralign = 1; - sec->sh.sh_flags = SHF_ALLOC; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; /* Add section name to .shstrtab (or .strtab for Clang) */ @@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect strcpy(relocname, ".rel"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0); free(relocname); if (!sec) return NULL; @@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 6cc80a075166..807f8c670097 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -56,6 +56,7 @@ struct symbol { unsigned int len; struct symbol *pfunc, *cfunc, *alias; bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc) } struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); void elf_add_reloc(struct elf *elf, struct reloc *reloc); int elf_write_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 528028a66816..9a7cd0b88bd8 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -16,6 +16,7 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; bool ignore_unreachables, c_file, hints, rodata; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 968f55e6dd94..e6b2363c2e03 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); if (!sec) return -1; @@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file) return -1; /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", + u_sec = elf_create_section(file->elf, ".orc_unwind", 0, sizeof(struct orc_entry), idx); /* populate sections */ diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 2a1261bfbb62..aa099b21dffa 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk +include/linux/static_call_types.h ' check_2 () { diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3ca6fe057a0b..b168364ac050 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -32,7 +32,7 @@ 18 spu oldstat sys_ni_syscall 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid -21 nospu mount sys_mount compat_sys_mount +21 nospu mount sys_mount 22 32 umount sys_oldumount 22 64 umount sys_ni_syscall 22 spu umount sys_ni_syscall @@ -189,8 +189,8 @@ 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock 144 common msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync 149 nospu _sysctl sys_ni_syscall @@ -363,7 +363,7 @@ 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee -285 common vmsplice sys_vmsplice compat_sys_vmsplice +285 common vmsplice sys_vmsplice 286 common openat sys_openat compat_sys_openat 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat @@ -443,8 +443,8 @@ 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns -351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +351 nospu process_vm_readv sys_process_vm_readv +352 nospu process_vm_writev sys_process_vm_writev 353 nospu finit_module sys_finit_module 354 nospu kcmp sys_kcmp 355 common sched_setattr sys_sched_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 6a0bbea225db..d2fa9647ce25 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -26,7 +26,7 @@ 16 32 lchown - compat_sys_s390_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount compat_sys_mount +21 common mount sys_mount 22 common umount sys_oldumount compat_sys_oldumount 23 32 setuid - compat_sys_s390_setuid16 24 32 getuid - compat_sys_s390_getuid16 @@ -134,8 +134,8 @@ 142 64 select sys_select - 143 common flock sys_flock sys_flock 144 common msync sys_msync compat_sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - @@ -316,7 +316,7 @@ 306 common splice sys_splice compat_sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee compat_sys_tee -309 common vmsplice sys_vmsplice compat_sys_vmsplice +309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu compat_sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait @@ -347,8 +347,8 @@ 337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr 343 common kcmp sys_kcmp compat_sys_kcmp 344 common finit_module sys_finit_module compat_sys_finit_module diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f30d6ae9a688..347809649ba2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,8 +371,8 @@ 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev +515 x32 readv sys_readv +516 x32 writev sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg @@ -388,15 +388,15 @@ 529 x32 waitid compat_sys_waitid 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice +532 x32 vmsplice sys_vmsplice 533 x32 move_pages compat_sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 537 x32 recvmmsg compat_sys_recvmmsg_time64 538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev +539 x32 process_vm_readv sys_process_vm_readv +540 x32 process_vm_writev sys_process_vm_writev 541 x32 setsockopt sys_setsockopt 542 x32 getsockopt sys_getsockopt 543 x32 io_setup compat_sys_io_setup diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 46ff97e909c6..1bc36a1db14f 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -171,7 +171,7 @@ class SystemValues: tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - '__pm_notifier_call_chain': {}, + 'pm_notifier_call_chain_robust': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index e8a657a5f48a..384589095864 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: LGPL-2.1 #define _GNU_SOURCE #include <assert.h> +#include <linux/membarrier.h> #include <pthread.h> #include <sched.h> +#include <stdatomic.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -1131,6 +1133,220 @@ static int set_signal_handler(void) return ret; } +struct test_membarrier_thread_args { + int stop; + intptr_t percpu_list_ptr; +}; + +/* Worker threads modify data in their "active" percpu lists. */ +void *test_membarrier_worker_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + const int iters = opt_reps; + int i; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Wait for initialization. */ + while (!atomic_load(&args->percpu_list_ptr)) {} + + for (i = 0; i < iters; ++i) { + int ret; + + do { + int cpu = rseq_cpu_start(); + + ret = rseq_offset_deref_addv(&args->percpu_list_ptr, + sizeof(struct percpu_list_entry) * cpu, 1, cpu); + } while (rseq_unlikely(ret)); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +void test_membarrier_init_percpu_list(struct percpu_list *list) +{ + int i; + + memset(list, 0, sizeof(*list)); + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + node = malloc(sizeof(*node)); + assert(node); + node->data = 0; + node->next = NULL; + list->c[i].head = node; + } +} + +void test_membarrier_free_percpu_list(struct percpu_list *list) +{ + int i; + + for (i = 0; i < CPU_SETSIZE; i++) + free(list->c[i].head); +} + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} + +/* + * The manager thread swaps per-cpu lists that worker threads see, + * and validates that there are no unexpected modifications. + */ +void *test_membarrier_manager_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + struct percpu_list list_a, list_b; + intptr_t expect_a = 0, expect_b = 0; + int cpu_a = 0, cpu_b = 0; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Init lists. */ + test_membarrier_init_percpu_list(&list_a); + test_membarrier_init_percpu_list(&list_b); + + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + + while (!atomic_load(&args->stop)) { + /* list_a is "active". */ + cpu_a = rand() % CPU_SETSIZE; + /* + * As list_b is "inactive", we should never see changes + * to list_b. + */ + if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_b "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + errno != ENXIO /* missing CPU */) { + perror("sys_membarrier"); + abort(); + } + /* + * Cpu A should now only modify list_b, so the values + * in list_a should be stable. + */ + expect_a = atomic_load(&list_a.c[cpu_a].head->data); + + cpu_b = rand() % CPU_SETSIZE; + /* + * As list_a is "inactive", we should never see changes + * to list_a. + */ + if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_a "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + errno != ENXIO /* missing CPU*/) { + perror("sys_membarrier"); + abort(); + } + /* Remember a value from list_b. */ + expect_b = atomic_load(&list_b.c[cpu_b].head->data); + } + + test_membarrier_free_percpu_list(&list_a); + test_membarrier_free_percpu_list(&list_b); + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +void test_membarrier(void) +{ + const int num_threads = opt_threads; + struct test_membarrier_thread_args thread_args; + pthread_t worker_threads[num_threads]; + pthread_t manager_thread; + int i, ret; + + if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { + perror("sys_membarrier"); + abort(); + } + + thread_args.stop = 0; + thread_args.percpu_list_ptr = 0; + ret = pthread_create(&manager_thread, NULL, + test_membarrier_manager_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&worker_threads[i], NULL, + test_membarrier_worker_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(worker_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + atomic_store(&thread_args.stop, 1); + ret = pthread_join(manager_thread, NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } +} +#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +void test_membarrier(void) +{ + fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " + "Skipping membarrier test.\n"); +} +#endif + static void show_usage(int argc, char **argv) { printf("Usage : %s <OPTIONS>\n", @@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv) printf(" [-r N] Number of repetitions per thread (default 5000)\n"); printf(" [-d] Disable rseq system call (no initialization)\n"); printf(" [-D M] Disable rseq for each M threads\n"); - printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); @@ -1268,6 +1484,7 @@ int main(int argc, char **argv) case 'i': case 'b': case 'm': + case 'r': break; default: show_usage(argc, argv); @@ -1320,6 +1537,10 @@ int main(int argc, char **argv) printf_verbose("counter increment\n"); test_percpu_inc(); break; + case 'r': + printf_verbose("membarrier\n"); + test_membarrier(); + break; } if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index b2da6004fe30..640411518e46 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -279,6 +279,63 @@ error1: #endif } +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_abi] "r" (&__rseq_abi), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + static inline __attribute__((always_inline)) int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index e426304fd4a0..f51bc83c9e41 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -15,6 +15,7 @@ TEST_LIST=( "-T m" "-T m -M" "-T i" + "-T r" ) TEST_NAME=( @@ -25,6 +26,7 @@ TEST_NAME=( "memcpy" "memcpy with barrier" "increment" + "membarrier" ) IFS="$OLDIFS" diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 998319553523..7161cfc2e60b 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -443,6 +443,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void) END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() @@ -526,6 +591,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { |