From 48c96a3685795e52903e60c7ee115e5e22e7d640 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 Dec 2014 16:56:01 -0800 Subject: mm/page_owner: keep track of page owners This is the page owner tracking code which is introduced so far ago. It is resident on Andrew's tree, though, nobody tried to upstream so it remain as is. Our company uses this feature actively to debug memory leak or to find a memory hogger so I decide to upstream this feature. This functionality help us to know who allocates the page. When allocating a page, we store some information about allocation in extra memory. Later, if we need to know status of all pages, we can get and analyze it from this stored information. In previous version of this feature, extra memory is statically defined in struct page, but, in this version, extra memory is allocated outside of struct page. It enables us to turn on/off this feature at boottime without considerable memory waste. Although we already have tracepoint for tracing page allocation/free, using it to analyze page owner is rather complex. We need to enlarge the trace buffer for preventing overlapping until userspace program launched. And, launched program continually dump out the trace buffer for later analysis and it would change system behaviour with more possibility rather than just keeping it in memory, so bad for debug. Moreover, we can use page_owner feature further for various purposes. For example, we can use it for fragmentation statistics implemented in this patch. And, I also plan to implement some CMA failure debugging feature using this interface. I'd like to give the credit for all developers contributed this feature, but, it's not easy because I don't know exact history. Sorry about that. Below is people who has "Signed-off-by" in the patches in Andrew's tree. Contributor: Alexander Nyberg Mel Gorman Dave Hansen Minchan Kim Michal Nazarewicz Andrew Morton Jungsoo Son Signed-off-by: Joonsoo Kim Cc: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Dave Hansen Cc: Michal Nazarewicz Cc: Jungsoo Son Cc: Ingo Molnar Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/Makefile | 4 +- tools/vm/page_owner_sort.c | 144 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 tools/vm/page_owner_sort.c (limited to 'tools') diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 3d907dacf2ac..ac884b65a072 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -1,6 +1,6 @@ # Makefile for vm tools # -TARGETS=page-types slabinfo +TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapikfs.a @@ -18,5 +18,5 @@ $(LIBS): $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) clean: - $(RM) page-types slabinfo + $(RM) page-types slabinfo page_owner_sort make -C $(LIB_DIR) clean diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c new file mode 100644 index 000000000000..77147b42d598 --- /dev/null +++ b/tools/vm/page_owner_sort.c @@ -0,0 +1,144 @@ +/* + * User-space helper to sort the output of /sys/kernel/debug/page_owner + * + * Example use: + * cat /sys/kernel/debug/page_owner > page_owner_full.txt + * grep -v ^PFN page_owner_full.txt > page_owner.txt + * ./sort page_owner.txt sorted_page_owner.txt +*/ + +#include +#include +#include +#include +#include +#include +#include + +struct block_list { + char *txt; + int len; + int num; +}; + + +static struct block_list *list; +static int list_size; +static int max_size; + +struct block_list *block_head; + +int read_block(char *buf, int buf_size, FILE *fin) +{ + char *curr = buf, *const buf_end = buf + buf_size; + + while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) { + if (*curr == '\n') /* empty line */ + return curr - buf; + curr += strlen(curr); + } + + return -1; /* EOF or no space left in buf. */ +} + +static int compare_txt(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return strcmp(l1->txt, l2->txt); +} + +static int compare_num(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l2->num - l1->num; +} + +static void add_list(char *buf, int len) +{ + if (list_size != 0 && + len == list[list_size-1].len && + memcmp(buf, list[list_size-1].txt, len) == 0) { + list[list_size-1].num++; + return; + } + if (list_size == max_size) { + printf("max_size too small??\n"); + exit(1); + } + list[list_size].txt = malloc(len+1); + list[list_size].len = len; + list[list_size].num = 1; + memcpy(list[list_size].txt, buf, len); + list[list_size].txt[len] = 0; + list_size++; + if (list_size % 1000 == 0) { + printf("loaded %d\r", list_size); + fflush(stdout); + } +} + +#define BUF_SIZE 1024 + +int main(int argc, char **argv) +{ + FILE *fin, *fout; + char buf[BUF_SIZE]; + int ret, i, count; + struct block_list *list2; + struct stat st; + + if (argc < 3) { + printf("Usage: ./program \n"); + perror("open: "); + exit(1); + } + + fin = fopen(argv[1], "r"); + fout = fopen(argv[2], "w"); + if (!fin || !fout) { + printf("Usage: ./program \n"); + perror("open: "); + exit(1); + } + + fstat(fileno(fin), &st); + max_size = st.st_size / 100; /* hack ... */ + + list = malloc(max_size * sizeof(*list)); + + for ( ; ; ) { + ret = read_block(buf, BUF_SIZE, fin); + if (ret < 0) + break; + + add_list(buf, ret); + } + + printf("loaded %d\n", list_size); + + printf("sorting ....\n"); + + qsort(list, list_size, sizeof(list[0]), compare_txt); + + list2 = malloc(sizeof(*list) * list_size); + + printf("culling\n"); + + for (i = count = 0; i < list_size; i++) { + if (count == 0 || + strcmp(list2[count-1].txt, list[i].txt) != 0) { + list2[count++] = list[i]; + } else { + list2[count-1].num += list[i].num; + } + } + + qsort(list2, count, sizeof(list[0]), compare_num); + + for (i = 0; i < count; i++) + fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt); + + return 0; +} -- cgit v1.2.3 From c9b26b81af9c3296685b5dbcbcd415400ab400dc Mon Sep 17 00:00:00 2001 From: David Drysdale Date: Fri, 12 Dec 2014 16:57:36 -0800 Subject: syscalls: add selftest for execveat(2) Signed-off-by: David Drysdale Cc: Meredydd Luff Cc: Shuah Khan Cc: "Eric W. Biederman" Cc: Andy Lutomirski Cc: Alexander Viro Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Arnd Bergmann Cc: Rich Felker Cc: Christoph Hellwig Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/exec/.gitignore | 9 + tools/testing/selftests/exec/Makefile | 25 ++ tools/testing/selftests/exec/execveat.c | 397 ++++++++++++++++++++++++++++++++ 4 files changed, 432 insertions(+) create mode 100644 tools/testing/selftests/exec/.gitignore create mode 100644 tools/testing/selftests/exec/Makefile create mode 100644 tools/testing/selftests/exec/execveat.c (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 45f145c6f843..c14893b501a9 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -15,6 +15,7 @@ TARGETS += user TARGETS += sysctl TARGETS += firmware TARGETS += ftrace +TARGETS += exec TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore new file mode 100644 index 000000000000..64073e050c6a --- /dev/null +++ b/tools/testing/selftests/exec/.gitignore @@ -0,0 +1,9 @@ +subdir* +script* +execveat +execveat.symlink +execveat.moved +execveat.path.ephemeral +execveat.ephemeral +execveat.denatured +xxxxxxxx* \ No newline at end of file diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile new file mode 100644 index 000000000000..66dfc2ce1788 --- /dev/null +++ b/tools/testing/selftests/exec/Makefile @@ -0,0 +1,25 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall +BINARIES = execveat +DEPS = execveat.symlink execveat.denatured script subdir +all: $(BINARIES) $(DEPS) + +subdir: + mkdir -p $@ +script: + echo '#!/bin/sh' > $@ + echo 'exit $$*' >> $@ + chmod +x $@ +execveat.symlink: execveat + ln -s -f $< $@ +execveat.denatured: execveat + cp $< $@ + chmod -x $@ +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + ./execveat + +clean: + rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c new file mode 100644 index 000000000000..33a5c06d95ca --- /dev/null +++ b/tools/testing/selftests/exec/execveat.c @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2014 Google, Inc. + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for execveat(2). + */ + +#define _GNU_SOURCE /* to get O_PATH, AT_EMPTY_PATH */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char longpath[2 * PATH_MAX] = ""; +static char *envp[] = { "IN_TEST=yes", NULL, NULL }; +static char *argv[] = { "execveat", "99", NULL }; + +static int execveat_(int fd, const char *path, char **argv, char **envp, + int flags) +{ +#ifdef __NR_execveat + return syscall(__NR_execveat, fd, path, argv, envp, flags); +#else + errno = -ENOSYS; + return -1; +#endif +} + +#define check_execveat_fail(fd, path, flags, errno) \ + _check_execveat_fail(fd, path, flags, errno, #errno) +static int _check_execveat_fail(int fd, const char *path, int flags, + int expected_errno, const char *errno_str) +{ + int rc; + + errno = 0; + printf("Check failure of execveat(%d, '%s', %d) with %s... ", + fd, path?:"(null)", flags, errno_str); + rc = execveat_(fd, path, argv, envp, flags); + + if (rc > 0) { + printf("[FAIL] (unexpected success from execveat(2))\n"); + return 1; + } + if (errno != expected_errno) { + printf("[FAIL] (expected errno %d (%s) not %d (%s)\n", + expected_errno, strerror(expected_errno), + errno, strerror(errno)); + return 1; + } + printf("[OK]\n"); + return 0; +} + +static int check_execveat_invoked_rc(int fd, const char *path, int flags, + int expected_rc) +{ + int status; + int rc; + pid_t child; + int pathlen = path ? strlen(path) : 0; + + if (pathlen > 40) + printf("Check success of execveat(%d, '%.20s...%s', %d)... ", + fd, path, (path + pathlen - 20), flags); + else + printf("Check success of execveat(%d, '%s', %d)... ", + fd, path?:"(null)", flags); + child = fork(); + if (child < 0) { + printf("[FAIL] (fork() failed)\n"); + return 1; + } + if (child == 0) { + /* Child: do execveat(). */ + rc = execveat_(fd, path, argv, envp, flags); + printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n", + rc, errno, strerror(errno)); + exit(1); /* should not reach here */ + } + /* Parent: wait for & check child's exit status. */ + rc = waitpid(child, &status, 0); + if (rc != child) { + printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc); + return 1; + } + if (!WIFEXITED(status)) { + printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n", + child, status); + return 1; + } + if (WEXITSTATUS(status) != expected_rc) { + printf("[FAIL] (child %d exited with %d not %d)\n", + child, WEXITSTATUS(status), expected_rc); + return 1; + } + printf("[OK]\n"); + return 0; +} + +static int check_execveat(int fd, const char *path, int flags) +{ + return check_execveat_invoked_rc(fd, path, flags, 99); +} + +static char *concat(const char *left, const char *right) +{ + char *result = malloc(strlen(left) + strlen(right) + 1); + + strcpy(result, left); + strcat(result, right); + return result; +} + +static int open_or_die(const char *filename, int flags) +{ + int fd = open(filename, flags); + + if (fd < 0) { + printf("Failed to open '%s'; " + "check prerequisites are available\n", filename); + exit(1); + } + return fd; +} + +static void exe_cp(const char *src, const char *dest) +{ + int in_fd = open_or_die(src, O_RDONLY); + int out_fd = open(dest, O_RDWR|O_CREAT|O_TRUNC, 0755); + struct stat info; + + fstat(in_fd, &info); + sendfile(out_fd, in_fd, NULL, info.st_size); + close(in_fd); + close(out_fd); +} + +#define XX_DIR_LEN 200 +static int check_execveat_pathmax(int dot_dfd, const char *src, int is_script) +{ + int fail = 0; + int ii, count, len; + char longname[XX_DIR_LEN + 1]; + int fd; + + if (*longpath == '\0') { + /* Create a filename close to PATH_MAX in length */ + memset(longname, 'x', XX_DIR_LEN - 1); + longname[XX_DIR_LEN - 1] = '/'; + longname[XX_DIR_LEN] = '\0'; + count = (PATH_MAX - 3) / XX_DIR_LEN; + for (ii = 0; ii < count; ii++) { + strcat(longpath, longname); + mkdir(longpath, 0755); + } + len = (PATH_MAX - 3) - (count * XX_DIR_LEN); + if (len <= 0) + len = 1; + memset(longname, 'y', len); + longname[len] = '\0'; + strcat(longpath, longname); + } + exe_cp(src, longpath); + + /* + * Execute as a pre-opened file descriptor, which works whether this is + * a script or not (because the interpreter sees a filename like + * "/dev/fd/20"). + */ + fd = open(longpath, O_RDONLY); + if (fd > 0) { + printf("Invoke copy of '%s' via filename of length %lu:\n", + src, strlen(longpath)); + fail += check_execveat(fd, "", AT_EMPTY_PATH); + } else { + printf("Failed to open length %lu filename, errno=%d (%s)\n", + strlen(longpath), errno, strerror(errno)); + fail++; + } + + /* + * Execute as a long pathname relative to ".". If this is a script, + * the interpreter will launch but fail to open the script because its + * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX. + */ + if (is_script) + fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, 127); + else + fail += check_execveat(dot_dfd, longpath, 0); + + return fail; +} + +static int run_tests(void) +{ + int fail = 0; + char *fullname = realpath("execveat", NULL); + char *fullname_script = realpath("script", NULL); + char *fullname_symlink = concat(fullname, ".symlink"); + int subdir_dfd = open_or_die("subdir", O_DIRECTORY|O_RDONLY); + int subdir_dfd_ephemeral = open_or_die("subdir.ephemeral", + O_DIRECTORY|O_RDONLY); + int dot_dfd = open_or_die(".", O_DIRECTORY|O_RDONLY); + int dot_dfd_path = open_or_die(".", O_DIRECTORY|O_RDONLY|O_PATH); + int dot_dfd_cloexec = open_or_die(".", O_DIRECTORY|O_RDONLY|O_CLOEXEC); + int fd = open_or_die("execveat", O_RDONLY); + int fd_path = open_or_die("execveat", O_RDONLY|O_PATH); + int fd_symlink = open_or_die("execveat.symlink", O_RDONLY); + int fd_denatured = open_or_die("execveat.denatured", O_RDONLY); + int fd_denatured_path = open_or_die("execveat.denatured", + O_RDONLY|O_PATH); + int fd_script = open_or_die("script", O_RDONLY); + int fd_ephemeral = open_or_die("execveat.ephemeral", O_RDONLY); + int fd_ephemeral_path = open_or_die("execveat.path.ephemeral", + O_RDONLY|O_PATH); + int fd_script_ephemeral = open_or_die("script.ephemeral", O_RDONLY); + int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC); + int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC); + + /* Change file position to confirm it doesn't affect anything */ + lseek(fd, 10, SEEK_SET); + + /* Normal executable file: */ + /* dfd + path */ + fail += check_execveat(subdir_dfd, "../execveat", 0); + fail += check_execveat(dot_dfd, "execveat", 0); + fail += check_execveat(dot_dfd_path, "execveat", 0); + /* absolute path */ + fail += check_execveat(AT_FDCWD, fullname, 0); + /* absolute path with nonsense dfd */ + fail += check_execveat(99, fullname, 0); + /* fd + no path */ + fail += check_execveat(fd, "", AT_EMPTY_PATH); + /* O_CLOEXEC fd + no path */ + fail += check_execveat(fd_cloexec, "", AT_EMPTY_PATH); + /* O_PATH fd */ + fail += check_execveat(fd_path, "", AT_EMPTY_PATH); + + /* Mess with executable file that's already open: */ + /* fd + no path to a file that's been renamed */ + rename("execveat.ephemeral", "execveat.moved"); + fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH); + /* fd + no path to a file that's been deleted */ + unlink("execveat.moved"); /* remove the file now fd open */ + fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH); + + /* Mess with executable file that's already open with O_PATH */ + /* fd + no path to a file that's been deleted */ + unlink("execveat.path.ephemeral"); + fail += check_execveat(fd_ephemeral_path, "", AT_EMPTY_PATH); + + /* Invalid argument failures */ + fail += check_execveat_fail(fd, "", 0, ENOENT); + fail += check_execveat_fail(fd, NULL, AT_EMPTY_PATH, EFAULT); + + /* Symlink to executable file: */ + /* dfd + path */ + fail += check_execveat(dot_dfd, "execveat.symlink", 0); + fail += check_execveat(dot_dfd_path, "execveat.symlink", 0); + /* absolute path */ + fail += check_execveat(AT_FDCWD, fullname_symlink, 0); + /* fd + no path, even with AT_SYMLINK_NOFOLLOW (already followed) */ + fail += check_execveat(fd_symlink, "", AT_EMPTY_PATH); + fail += check_execveat(fd_symlink, "", + AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW); + + /* Symlink fails when AT_SYMLINK_NOFOLLOW set: */ + /* dfd + path */ + fail += check_execveat_fail(dot_dfd, "execveat.symlink", + AT_SYMLINK_NOFOLLOW, ELOOP); + fail += check_execveat_fail(dot_dfd_path, "execveat.symlink", + AT_SYMLINK_NOFOLLOW, ELOOP); + /* absolute path */ + fail += check_execveat_fail(AT_FDCWD, fullname_symlink, + AT_SYMLINK_NOFOLLOW, ELOOP); + + /* Shell script wrapping executable file: */ + /* dfd + path */ + fail += check_execveat(subdir_dfd, "../script", 0); + fail += check_execveat(dot_dfd, "script", 0); + fail += check_execveat(dot_dfd_path, "script", 0); + /* absolute path */ + fail += check_execveat(AT_FDCWD, fullname_script, 0); + /* fd + no path */ + fail += check_execveat(fd_script, "", AT_EMPTY_PATH); + fail += check_execveat(fd_script, "", + AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW); + /* O_CLOEXEC fd fails for a script (as script file inaccessible) */ + fail += check_execveat_fail(fd_script_cloexec, "", AT_EMPTY_PATH, + ENOENT); + fail += check_execveat_fail(dot_dfd_cloexec, "script", 0, ENOENT); + + /* Mess with script file that's already open: */ + /* fd + no path to a file that's been renamed */ + rename("script.ephemeral", "script.moved"); + fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH); + /* fd + no path to a file that's been deleted */ + unlink("script.moved"); /* remove the file while fd open */ + fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH); + + /* Rename a subdirectory in the path: */ + rename("subdir.ephemeral", "subdir.moved"); + fail += check_execveat(subdir_dfd_ephemeral, "../script", 0); + fail += check_execveat(subdir_dfd_ephemeral, "script", 0); + /* Remove the subdir and its contents */ + unlink("subdir.moved/script"); + unlink("subdir.moved"); + /* Shell loads via deleted subdir OK because name starts with .. */ + fail += check_execveat(subdir_dfd_ephemeral, "../script", 0); + fail += check_execveat_fail(subdir_dfd_ephemeral, "script", 0, ENOENT); + + /* Flag values other than AT_SYMLINK_NOFOLLOW => EINVAL */ + fail += check_execveat_fail(dot_dfd, "execveat", 0xFFFF, EINVAL); + /* Invalid path => ENOENT */ + fail += check_execveat_fail(dot_dfd, "no-such-file", 0, ENOENT); + fail += check_execveat_fail(dot_dfd_path, "no-such-file", 0, ENOENT); + fail += check_execveat_fail(AT_FDCWD, "no-such-file", 0, ENOENT); + /* Attempt to execute directory => EACCES */ + fail += check_execveat_fail(dot_dfd, "", AT_EMPTY_PATH, EACCES); + /* Attempt to execute non-executable => EACCES */ + fail += check_execveat_fail(dot_dfd, "Makefile", 0, EACCES); + fail += check_execveat_fail(fd_denatured, "", AT_EMPTY_PATH, EACCES); + fail += check_execveat_fail(fd_denatured_path, "", AT_EMPTY_PATH, + EACCES); + /* Attempt to execute nonsense FD => EBADF */ + fail += check_execveat_fail(99, "", AT_EMPTY_PATH, EBADF); + fail += check_execveat_fail(99, "execveat", 0, EBADF); + /* Attempt to execute relative to non-directory => ENOTDIR */ + fail += check_execveat_fail(fd, "execveat", 0, ENOTDIR); + + fail += check_execveat_pathmax(dot_dfd, "execveat", 0); + fail += check_execveat_pathmax(dot_dfd, "script", 1); + return fail; +} + +static void prerequisites(void) +{ + int fd; + const char *script = "#!/bin/sh\nexit $*\n"; + + /* Create ephemeral copies of files */ + exe_cp("execveat", "execveat.ephemeral"); + exe_cp("execveat", "execveat.path.ephemeral"); + exe_cp("script", "script.ephemeral"); + mkdir("subdir.ephemeral", 0755); + + fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755); + write(fd, script, strlen(script)); + close(fd); +} + +int main(int argc, char **argv) +{ + int ii; + int rc; + const char *verbose = getenv("VERBOSE"); + + if (argc >= 2) { + /* If we are invoked with an argument, don't run tests. */ + const char *in_test = getenv("IN_TEST"); + + if (verbose) { + printf(" invoked with:"); + for (ii = 0; ii < argc; ii++) + printf(" [%d]='%s'", ii, argv[ii]); + printf("\n"); + } + + /* Check expected environment transferred. */ + if (!in_test || strcmp(in_test, "yes") != 0) { + printf("[FAIL] (no IN_TEST=yes in env)\n"); + return 1; + } + + /* Use the final argument as an exit code. */ + rc = atoi(argv[argc - 1]); + fflush(stdout); + } else { + prerequisites(); + if (verbose) + envp[1] = "VERBOSE=1"; + rc = run_tests(); + if (rc > 0) + printf("%d tests failed\n", rc); + } + return rc; +} -- cgit v1.2.3