diff options
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r-- | arch/um/os-Linux/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/Makefile | 13 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/etap.h | 21 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/ethertap_kern.c | 100 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/ethertap_user.c | 248 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap.h | 21 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap_kern.c | 86 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap_user.c | 215 | ||||
-rw-r--r-- | arch/um/os-Linux/file.c | 15 | ||||
-rw-r--r-- | arch/um/os-Linux/helper.c | 67 | ||||
-rw-r--r-- | arch/um/os-Linux/internal.h | 5 | ||||
-rw-r--r-- | arch/um/os-Linux/main.c | 8 | ||||
-rw-r--r-- | arch/um/os-Linux/process.c | 82 | ||||
-rw-r--r-- | arch/um/os-Linux/registers.c | 4 | ||||
-rw-r--r-- | arch/um/os-Linux/sigio.c | 353 | ||||
-rw-r--r-- | arch/um/os-Linux/signal.c | 23 | ||||
-rw-r--r-- | arch/um/os-Linux/skas/mem.c | 103 | ||||
-rw-r--r-- | arch/um/os-Linux/skas/process.c | 498 | ||||
-rw-r--r-- | arch/um/os-Linux/start_up.c | 195 |
19 files changed, 825 insertions, 1234 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 049dfa5bc9c6..fae836713487 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ - umid.o user_syms.o util.o drivers/ skas/ + umid.o user_syms.o util.o skas/ CFLAGS_signal.o += -Wframe-larger-than=4096 diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile deleted file mode 100644 index cf2d75bb1884..000000000000 --- a/arch/um/os-Linux/drivers/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -# - -ethertap-objs := ethertap_kern.o ethertap_user.o -tuntap-objs := tuntap_kern.o tuntap_user.o - -obj-y = -obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o - -include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h deleted file mode 100644 index a475259f90e1..000000000000 --- a/arch/um/os-Linux/drivers/etap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __DRIVERS_ETAP_H -#define __DRIVERS_ETAP_H - -#include <net_user.h> - -struct ethertap_data { - char *dev_name; - char *gate_addr; - int data_fd; - int control_fd; - void *dev; -}; - -extern const struct net_user_info ethertap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c deleted file mode 100644 index 5e5ee40680ce..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include "etap.h" -#include <net_kern.h> - -struct ethertap_init { - char *dev_name; - char *gate_addr; -}; - -static void etap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct ethertap_data *epri; - struct ethertap_init *init = data; - - pri = netdev_priv(dev); - epri = (struct ethertap_data *) pri->user; - epri->dev_name = init->dev_name; - epri->gate_addr = init->gate_addr; - epri->data_fd = -1; - epri->control_fd = -1; - epri->dev = dev; - - printk(KERN_INFO "ethertap backend - %s", epri->dev_name); - if (epri->gate_addr != NULL) - printk(KERN_CONT ", IP = %s", epri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - int len; - - len = net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); - if (len <= 0) - return(len); - - skb_pull(skb, 2); - len -= 2; - return len; -} - -static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - skb_push(skb, 2); - return net_send(fd, skb->data, skb->len); -} - -const struct net_kern_info ethertap_kern_info = { - .init = etap_init, - .protocol = eth_protocol, - .read = etap_read, - .write = etap_write, -}; - -static int ethertap_setup(char *str, char **mac_out, void *data) -{ - struct ethertap_init *init = data; - - *init = ((struct ethertap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - if (init->dev_name == NULL) { - printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); - return 0; - } - - return 1; -} - -static struct transport ethertap_transport = { - .list = LIST_HEAD_INIT(ethertap_transport.list), - .name = "ethertap", - .setup = ethertap_setup, - .user = ðertap_user_info, - .kern = ðertap_kern_info, - .private_size = sizeof(struct ethertap_data), - .setup_size = sizeof(struct ethertap_init), -}; - -static int register_ethertap(void) -{ - register_transport(ðertap_transport); - return 0; -} - -late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c deleted file mode 100644 index bdf215c0eca7..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_user.c +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include "etap.h" -#include <os.h> -#include <net_user.h> -#include <um_malloc.h> - -#define MAX_PACKET ETH_MAX_PACKET - -static int etap_user_init(void *data, void *dev) -{ - struct ethertap_data *pri = data; - - pri->dev = dev; - return 0; -} - -struct addr_change { - enum { ADD_ADDR, DEL_ADDR } what; - unsigned char addr[4]; - unsigned char netmask[4]; -}; - -static void etap_change(int op, unsigned char *addr, unsigned char *netmask, - int fd) -{ - struct addr_change change; - char *output; - int n; - - change.what = op; - memcpy(change.addr, addr, sizeof(change.addr)); - memcpy(change.netmask, netmask, sizeof(change.netmask)); - CATCH_EINTR(n = write(fd, &change, sizeof(change))); - if (n != sizeof(change)) { - printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", - errno); - return; - } - - output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); - if (output == NULL) - printk(UM_KERN_ERR "etap_change : Failed to allocate output " - "buffer\n"); - read_output(fd, output, UM_KERN_PAGE_SIZE); - if (output != NULL) { - printk("%s", output); - kfree(output); - } -} - -static void etap_open_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -} - -static void etap_close_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -} - -struct etap_pre_exec_data { - int control_remote; - int control_me; - int data_me; -}; - -static void etap_pre_exec(void *arg) -{ - struct etap_pre_exec_data *data = arg; - - dup2(data->control_remote, 1); - close(data->data_me); - close(data->control_me); -} - -static int etap_tramp(char *dev, char *gate, int control_me, - int control_remote, int data_me, int data_remote) -{ - struct etap_pre_exec_data pe_data; - int pid, err, n; - char version_buf[sizeof("nnnnn\0")]; - char data_fd_buf[sizeof("nnnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, - data_fd_buf, gate_buf, NULL }; - char *nosetup_args[] = { "uml_net", version_buf, "ethertap", - dev, data_fd_buf, NULL }; - char **args, c; - - sprintf(data_fd_buf, "%d", data_remote); - sprintf(version_buf, "%d", UML_NET_VERSION); - if (gate != NULL) { - strscpy(gate_buf, gate); - args = setup_args; - } - else args = nosetup_args; - - err = 0; - pe_data.control_remote = control_remote; - pe_data.control_me = control_me; - pe_data.data_me = data_me; - pid = run_helper(etap_pre_exec, &pe_data, args); - - if (pid < 0) - err = pid; - close(data_remote); - close(control_remote); - CATCH_EINTR(n = read(control_me, &c, sizeof(c))); - if (n != sizeof(c)) { - err = -errno; - printk(UM_KERN_ERR "etap_tramp : read of status failed, " - "err = %d\n", -err); - return err; - } - if (c != 1) { - printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); - err = helper_wait(pid); - } - return err; -} - -static int etap_open(void *data) -{ - struct ethertap_data *pri = data; - char *output; - int data_fds[2], control_fds[2], err, output_len; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err) - return err; - - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - data socketpair failed - " - "err = %d\n", errno); - return err; - } - - err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - control socketpair failed - " - "err = %d\n", errno); - goto out_close_data; - } - - err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], - control_fds[1], data_fds[0], data_fds[1]); - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - read_output(control_fds[0], output, output_len); - - if (output == NULL) - printk(UM_KERN_ERR "etap_open : failed to allocate output " - "buffer\n"); - else { - printk("%s", output); - kfree(output); - } - - if (err < 0) { - printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); - goto out_close_control; - } - - pri->data_fd = data_fds[0]; - pri->control_fd = control_fds[0]; - iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); - return data_fds[0]; - -out_close_control: - close(control_fds[0]); - close(control_fds[1]); -out_close_data: - close(data_fds[0]); - close(data_fds[1]); - return err; -} - -static void etap_close(int fd, void *data) -{ - struct ethertap_data *pri = data; - - iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - close(fd); - - if (shutdown(pri->data_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " - "errno = %d\n", errno); - - if (shutdown(pri->control_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown control socket " - "failed, errno = %d\n", errno); - - close(pri->data_fd); - pri->data_fd = -1; - close(pri->control_fd); - pri->control_fd = -1; -} - -static void etap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if (pri->control_fd == -1) - return; - etap_open_addr(addr, netmask, &pri->control_fd); -} - -static void etap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - if (pri->control_fd == -1) - return; - - etap_close_addr(addr, netmask, &pri->control_fd); -} - -const struct net_user_info ethertap_user_info = { - .init = etap_user_init, - .open = etap_open, - .close = etap_close, - .remove = NULL, - .add_address = etap_add_addr, - .delete_address = etap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, -}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h deleted file mode 100644 index e364e42abfc5..000000000000 --- a/arch/um/os-Linux/drivers/tuntap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __UM_TUNTAP_H -#define __UM_TUNTAP_H - -#include <net_user.h> - -struct tuntap_data { - char *dev_name; - int fixed_config; - char *gate_addr; - int fd; - void *dev; -}; - -extern const struct net_user_info tuntap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c deleted file mode 100644 index ff022d9cf0dd..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/netdevice.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <asm/errno.h> -#include <net_kern.h> -#include "tuntap.h" - -struct tuntap_init { - char *dev_name; - char *gate_addr; -}; - -static void tuntap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct tuntap_data *tpri; - struct tuntap_init *init = data; - - pri = netdev_priv(dev); - tpri = (struct tuntap_data *) pri->user; - tpri->dev_name = init->dev_name; - tpri->fixed_config = (init->dev_name != NULL); - tpri->gate_addr = init->gate_addr; - tpri->fd = -1; - tpri->dev = dev; - - printk(KERN_INFO "TUN/TAP backend - "); - if (tpri->gate_addr != NULL) - printk(KERN_CONT "IP = %s", tpri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_write(fd, skb->data, skb->len); -} - -const struct net_kern_info tuntap_kern_info = { - .init = tuntap_init, - .protocol = eth_protocol, - .read = tuntap_read, - .write = tuntap_write, -}; - -static int tuntap_setup(char *str, char **mac_out, void *data) -{ - struct tuntap_init *init = data; - - *init = ((struct tuntap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - - return 1; -} - -static struct transport tuntap_transport = { - .list = LIST_HEAD_INIT(tuntap_transport.list), - .name = "tuntap", - .setup = tuntap_setup, - .user = &tuntap_user_info, - .kern = &tuntap_kern_info, - .private_size = sizeof(struct tuntap_data), - .setup_size = sizeof(struct tuntap_init), -}; - -static int register_tuntap(void) -{ - register_transport(&tuntap_transport); - return 0; -} - -late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c deleted file mode 100644 index 91f0e27ca3a6..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ /dev/null @@ -1,215 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <linux/if_tun.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include <sys/uio.h> -#include <kern_util.h> -#include <os.h> -#include "tuntap.h" - -static int tuntap_user_init(void *data, void *dev) -{ - struct tuntap_data *pri = data; - - pri->dev = dev; - return 0; -} - -static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if ((pri->fd == -1) || pri->fixed_config) - return; - open_addr(addr, netmask, pri->dev_name); -} - -static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - if ((pri->fd == -1) || pri->fixed_config) - return; - close_addr(addr, netmask, pri->dev_name); -} - -struct tuntap_pre_exec_data { - int stdout_fd; - int close_me; -}; - -static void tuntap_pre_exec(void *arg) -{ - struct tuntap_pre_exec_data *data = arg; - - dup2(data->stdout_fd, 1); - close(data->close_me); -} - -static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, - char *buffer, int buffer_len, int *used_out) -{ - struct tuntap_pre_exec_data data; - char version_buf[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, - NULL }; - char buf[CMSG_SPACE(sizeof(*fd_out))]; - struct msghdr msg; - struct cmsghdr *cmsg; - struct iovec iov; - int pid, n, err; - - sprintf(version_buf, "%d", UML_NET_VERSION); - - data.stdout_fd = remote; - data.close_me = me; - - pid = run_helper(tuntap_pre_exec, &data, argv); - - if (pid < 0) - return pid; - - close(remote); - - msg.msg_name = NULL; - msg.msg_namelen = 0; - if (buffer != NULL) { - iov = ((struct iovec) { buffer, buffer_len }); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - } - else { - msg.msg_iov = NULL; - msg.msg_iovlen = 0; - } - msg.msg_control = buf; - msg.msg_controllen = sizeof(buf); - msg.msg_flags = 0; - n = recvmsg(me, &msg, 0); - *used_out = n; - if (n < 0) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " - "errno = %d\n", errno); - return err; - } - helper_wait(pid); - - cmsg = CMSG_FIRSTHDR(&msg); - if (cmsg == NULL) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "message\n"); - return -EINVAL; - } - if ((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "descriptor\n"); - return -EINVAL; - } - *fd_out = ((int *) CMSG_DATA(cmsg))[0]; - os_set_exec_close(*fd_out); - return 0; -} - -static int tuntap_open(void *data) -{ - struct ifreq ifr; - struct tuntap_data *pri = data; - char *output, *buffer; - int err, fds[2], len, used; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err < 0) - return err; - - if (pri->fixed_config) { - pri->fd = os_open_file("/dev/net/tun", - of_cloexec(of_rdwr(OPENFLAGS())), 0); - if (pri->fd < 0) { - printk(UM_KERN_ERR "Failed to open /dev/net/tun, " - "err = %d\n", -pri->fd); - return pri->fd; - } - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strscpy(ifr.ifr_name, pri->dev_name); - if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { - err = -errno; - printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", - errno); - close(pri->fd); - return err; - } - } - else { - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open : socketpair failed - " - "errno = %d\n", errno); - return err; - } - - buffer = get_output_buffer(&len); - if (buffer != NULL) - len--; - used = 0; - - err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], - fds[1], buffer, len, &used); - - output = buffer; - if (err < 0) { - printk("%s", output); - free_output_buffer(buffer); - printk(UM_KERN_ERR "tuntap_open_tramp failed - " - "err = %d\n", -err); - return err; - } - - pri->dev_name = uml_strdup(buffer); - output += IFNAMSIZ; - printk("%s", output); - free_output_buffer(buffer); - - close(fds[0]); - iter_addresses(pri->dev, open_addr, pri->dev_name); - } - - return pri->fd; -} - -static void tuntap_close(int fd, void *data) -{ - struct tuntap_data *pri = data; - - if (!pri->fixed_config) - iter_addresses(pri->dev, close_addr, pri->dev_name); - close(fd); - pri->fd = -1; -} - -const struct net_user_info tuntap_user_info = { - .init = tuntap_user_init, - .open = tuntap_open, - .close = tuntap_close, - .remove = NULL, - .add_address = tuntap_add_addr, - .delete_address = tuntap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index a0d01c68ce3e..617886d1fb1e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf) return 0; } -int os_set_slip(int fd) -{ - int disc, sencap; - - disc = N_SLIP; - if (ioctl(fd, TIOCSETD, &disc) < 0) - return -errno; - - sencap = 0; - if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) - return -errno; - - return 0; -} - int os_mode_fd(int fd, int mode) { int err; diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 3cb8ac63be6e..89c2ad2a4e3a 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -8,6 +8,7 @@ #include <unistd.h> #include <errno.h> #include <sched.h> +#include <pthread.h> #include <linux/limits.h> #include <sys/socket.h> #include <sys/wait.h> @@ -121,6 +122,10 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; + /* To share memory space, use os_run_helper_thread() instead. */ + if (flags & CLONE_VM) + return -EINVAL; + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; @@ -167,3 +172,65 @@ int helper_wait(int pid) } else return 0; } + +struct os_helper_thread { + pthread_t handle; +}; + +int os_run_helper_thread(struct os_helper_thread **td_out, + void *(*routine)(void *), void *arg) +{ + struct os_helper_thread *td; + sigset_t sigset, oset; + int err, flags; + + flags = __uml_cant_sleep() ? UM_GFP_ATOMIC : UM_GFP_KERNEL; + td = uml_kmalloc(sizeof(*td), flags); + if (!td) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = -errno; + kfree(td); + return err; + } + + err = pthread_create(&td->handle, NULL, routine, arg); + + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask: %d", errno); + + if (err != 0) + kfree(td); + else + *td_out = td; + + return -err; +} + +void os_kill_helper_thread(struct os_helper_thread *td) +{ + pthread_cancel(td->handle); + pthread_join(td->handle, NULL); + kfree(td); +} + +void os_fix_helper_thread_signals(void) +{ + sigset_t sigset; + + sigemptyset(&sigset); + + sigaddset(&sigset, SIGWINCH); + sigaddset(&sigset, SIGPIPE); + sigaddset(&sigset, SIGPROF); + sigaddset(&sigset, SIGINT); + sigaddset(&sigset, SIGTERM); + sigaddset(&sigset, SIGCHLD); + sigaddset(&sigset, SIGALRM); + sigaddset(&sigset, SIGIO); + sigaddset(&sigset, SIGUSR1); + + pthread_sigmask(SIG_SETMASK, &sigset, NULL); +} diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index 317fca190c2b..5d8d3b0817a9 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -2,6 +2,9 @@ #ifndef __UM_OS_LINUX_INTERNAL_H #define __UM_OS_LINUX_INTERNAL_H +#include <mm_id.h> +#include <stub-data.h> + /* * elf_aux.c */ @@ -16,5 +19,5 @@ void check_tmpexec(void); * skas/process.c */ void wait_stub_done(int pid); - +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 0afcdeb8995b..3c63ce19e3bf 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -19,13 +19,11 @@ #include <um_malloc.h> #include "internal.h" -#define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) -#define THREAD_NAME_LEN (256) long elf_aux_hwcap; -static void set_stklim(void) +static void __init set_stklim(void) { struct rlimit lim; @@ -48,7 +46,7 @@ static void last_ditch_exit(int sig) exit(1); } -static void install_fatal_handler(int sig) +static void __init install_fatal_handler(int sig) { struct sigaction action; @@ -73,7 +71,7 @@ static void install_fatal_handler(int sig) #define UML_LIB_PATH ":" OS_LIB_PATH "/uml" -static void setup_env_path(void) +static void __init setup_env_path(void) { char *new_path = NULL; char *old_path = NULL; diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 9f086f939420..00b49e90d05f 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -18,17 +18,29 @@ #include <init.h> #include <longjmp.h> #include <os.h> +#include <skas/skas.h> void os_alarm_process(int pid) { + if (pid <= 0) + return; + kill(pid, SIGALRM); } void os_kill_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); } /* Kill off a ptraced child by all means available. kill it normally first, @@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child) void os_kill_ptraced_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); ptrace(PTRACE_KILL, pid); ptrace(PTRACE_CONT, pid); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); +} + +pid_t os_reap_child(void) +{ + int status; + + /* Try to reap a child */ + return waitpid(-1, &status, WNOHANG); } /* Don't use the glibc version, which caches the result in TLS. It misses some @@ -142,57 +170,6 @@ out: return ok; } -static int os_page_mincore(void *addr) -{ - char vec[2]; - int ret; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - return 0; - else - return -errno; - } - - return vec[0] & 1; -} - -int os_mincore(void *addr, unsigned long len) -{ - char *vec; - int ret, i; - - if (len <= UM_KERN_PAGE_SIZE) - return os_page_mincore(addr); - - vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); - if (!vec) - return -ENOMEM; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - ret = 0; - else - ret = -errno; - - goto out; - } - - for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { - if (!(vec[i] & 1)) { - ret = 0; - goto out; - } - } - - ret = 1; -out: - free(vec); - return ret; -} - void init_new_thread_signals(void) { set_handler(SIGSEGV); @@ -202,6 +179,9 @@ void init_new_thread_signals(void) set_handler(SIGBUS); signal(SIGHUP, SIG_IGN); set_handler(SIGIO); + /* We (currently) only use the child reaper IRQ in seccomp mode */ + if (using_seccomp) + set_handler(SIGCHLD); signal(SIGWINCH, SIG_IGN); } diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index d7ca148807b2..bfba2cbc9478 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -14,8 +14,8 @@ /* This is set once at boot time and not changed thereafter */ -static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long *exec_fp_regs; +unsigned long exec_regs[MAX_REG_NR]; +unsigned long *exec_fp_regs; int init_pid_registers(int pid) { diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9aac8def4d63..6de145f8fe3d 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -11,6 +11,8 @@ #include <sched.h> #include <signal.h> #include <string.h> +#include <sys/epoll.h> +#include <asm/unistd.h> #include <kern_util.h> #include <init.h> #include <os.h> @@ -21,184 +23,51 @@ * Protected by sigio_lock(), also used by sigio_cleanup, which is an * exitcall. */ -static int write_sigio_pid = -1; -static unsigned long write_sigio_stack; +static struct os_helper_thread *write_sigio_td; -/* - * These arrays are initialized before the sigio thread is started, and - * the descriptors closed after it is killed. So, it can't see them change. - * On the UML side, they are changed under the sigio_lock. - */ -#define SIGIO_FDS_INIT {-1, -1} - -static int write_sigio_fds[2] = SIGIO_FDS_INIT; -static int sigio_private[2] = SIGIO_FDS_INIT; +static int epollfd = -1; -struct pollfds { - struct pollfd *poll; - int size; - int used; -}; +#define MAX_EPOLL_EVENTS 64 -/* - * Protected by sigio_lock(). Used by the sigio thread, but the UML thread - * synchronizes with it. - */ -static struct pollfds current_poll; -static struct pollfds next_poll; -static struct pollfds all_sigio_fds; +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; -static int write_sigio_thread(void *unused) +static void *write_sigio_thread(void *unused) { - struct pollfds *fds, tmp; - struct pollfd *p; - int i, n, respond_fd; - char c; - - os_set_pdeathsig(); - os_fix_helper_signals(); - fds = ¤t_poll; + int pid = getpid(); + int r; + + os_fix_helper_thread_signals(); + while (1) { - n = poll(fds->poll, fds->used, -1); - if (n < 0) { + r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1); + if (r < 0) { if (errno == EINTR) continue; - printk(UM_KERN_ERR "write_sigio_thread : poll returned " - "%d, errno = %d\n", n, errno); + printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n", + __func__, errno); } - for (i = 0; i < fds->used; i++) { - p = &fds->poll[i]; - if (p->revents == 0) - continue; - if (p->fd == sigio_private[1]) { - CATCH_EINTR(n = read(sigio_private[1], &c, - sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR - "write_sigio_thread : " - "read on socket failed, " - "err = %d\n", errno); - tmp = current_poll; - current_poll = next_poll; - next_poll = tmp; - respond_fd = sigio_private[1]; - } - else { - respond_fd = write_sigio_fds[1]; - fds->used--; - memmove(&fds->poll[i], &fds->poll[i + 1], - (fds->used - i) * sizeof(*fds->poll)); - } - - CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR "write_sigio_thread : " - "write on socket failed, err = %d\n", - errno); - } - } - return 0; -} - -static int need_poll(struct pollfds *polls, int n) -{ - struct pollfd *new; - - if (n <= polls->size) - return 0; - - new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); - if (new == NULL) { - printk(UM_KERN_ERR "need_poll : failed to allocate new " - "pollfds\n"); - return -ENOMEM; + CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO)); + if (r < 0) + printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", + __func__, errno); } - memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); - kfree(polls->poll); - - polls->poll = new; - polls->size = n; - return 0; -} - -/* - * Must be called with sigio_lock held, because it's needed by the marked - * critical section. - */ -static void update_thread(void) -{ - unsigned long flags; - int n; - char c; - - flags = um_set_signals_trace(0); - CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", - errno); - goto fail; - } - - CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", - errno); - goto fail; - } - - um_set_signals_trace(flags); - return; - fail: - /* Critical section start */ - if (write_sigio_pid != -1) { - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); - } - write_sigio_pid = -1; - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); - /* Critical section end */ - um_set_signals_trace(flags); + return NULL; } int __add_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n; - - for (i = 0; i < all_sigio_fds.used; i++) { - if (all_sigio_fds.poll[i].fd == fd) - break; - } - if (i == all_sigio_fds.used) - return -ENOSPC; - - p = &all_sigio_fds.poll[i]; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - return 0; - } - - n = current_poll.used; - err = need_poll(&next_poll, n + 1); - if (err) - return err; - - memcpy(next_poll.poll, current_poll.poll, - current_poll.used * sizeof(struct pollfd)); - next_poll.poll[n] = *p; - next_poll.used = n + 1; - update_thread(); - - return 0; + struct epoll_event event = { + .data.fd = fd, + .events = EPOLLIN | EPOLLET, + }; + int r; + + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event)); + return r < 0 ? -errno : 0; } - int add_sigio_fd(int fd) { int err; @@ -212,38 +81,11 @@ int add_sigio_fd(int fd) int __ignore_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n = 0; - - /* - * This is called from exitcalls elsewhere in UML - if - * sigio_cleanup has already run, then update_thread will hang - * or fail because the thread is no longer running. - */ - if (write_sigio_pid == -1) - return -EIO; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - break; - } - if (i == current_poll.used) - return -ENOENT; - - err = need_poll(&next_poll, current_poll.used - 1); - if (err) - return err; - - for (i = 0; i < current_poll.used; i++) { - p = ¤t_poll.poll[i]; - if (p->fd != fd) - next_poll.poll[n++] = *p; - } - next_poll.used = current_poll.used - 1; - - update_thread(); + struct epoll_event event; + int r; - return 0; + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event)); + return r < 0 ? -errno : 0; } int ignore_sigio_fd(int fd) @@ -257,125 +99,37 @@ int ignore_sigio_fd(int fd) return err; } -static struct pollfd *setup_initial_poll(int fd) -{ - struct pollfd *p; - - p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); - if (p == NULL) { - printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " - "poll\n"); - return NULL; - } - *p = ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); - return p; -} - static void write_sigio_workaround(void) { - struct pollfd *p; int err; - int l_write_sigio_fds[2]; - int l_sigio_private[2]; - int l_write_sigio_pid; - /* We call this *tons* of times - and most ones we must just fail. */ sigio_lock(); - l_write_sigio_pid = write_sigio_pid; - sigio_unlock(); - - if (l_write_sigio_pid != -1) - return; + if (write_sigio_td) + goto out; - err = os_pipe(l_write_sigio_fds, 1, 1); - if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " - "err = %d\n", -err); - return; + epollfd = epoll_create(MAX_EPOLL_EVENTS); + if (epollfd < 0) { + printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n", + __func__, errno); + goto out; } - err = os_pipe(l_sigio_private, 1, 1); + + err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " - "err = %d\n", -err); - goto out_close1; + printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n", + __func__, -err); + close(epollfd); + epollfd = -1; + goto out; } - p = setup_initial_poll(l_sigio_private[1]); - if (!p) - goto out_close2; - - sigio_lock(); - - /* - * Did we race? Don't try to optimize this, please, it's not so likely - * to happen, and no more than once at the boot. - */ - if (write_sigio_pid != -1) - goto out_free; - - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - - if (write_sigio_irq(l_write_sigio_fds[0])) - goto out_clear_poll; - - memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); - memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, - CLONE_FILES | CLONE_VM, - &write_sigio_stack); - - if (write_sigio_pid < 0) - goto out_clear; - - sigio_unlock(); - return; - -out_clear: - write_sigio_pid = -1; - write_sigio_fds[0] = -1; - write_sigio_fds[1] = -1; - sigio_private[0] = -1; - sigio_private[1] = -1; -out_clear_poll: - current_poll = ((struct pollfds) { .poll = NULL, - .size = 0, - .used = 0 }); -out_free: +out: sigio_unlock(); - kfree(p); -out_close2: - close(l_sigio_private[0]); - close(l_sigio_private[1]); -out_close1: - close(l_write_sigio_fds[0]); - close(l_write_sigio_fds[1]); } -void sigio_broken(int fd) +void sigio_broken(void) { - int err; - write_sigio_workaround(); - - sigio_lock(); - err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); - if (err) { - printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " - "for descriptor %d\n", fd); - goto out; - } - - all_sigio_fds.poll[all_sigio_fds.used++] = - ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); -out: - sigio_unlock(); } /* Changed during early boot */ @@ -389,17 +143,16 @@ void maybe_sigio_broken(int fd) if (pty_output_sigio) return; - sigio_broken(fd); + sigio_broken(); } static void sigio_cleanup(void) { - if (write_sigio_pid == -1) + if (!write_sigio_td) return; - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); - write_sigio_pid = -1; + os_kill_helper_thread(write_sigio_td); + write_sigio_td = NULL; } __uml_exitcall(sigio_cleanup); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 9ea7269ffb77..11f07f498270 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -21,7 +21,7 @@ #include <sys/ucontext.h> #include <timetravel.h> -void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = { [SIGTRAP] = relay_signal, [SIGFPE] = relay_signal, [SIGILL] = relay_signal, @@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, + [SIGCHLD] = sigchld_handler, }; static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) @@ -44,10 +45,10 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) } /* enable signals if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH)) + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD)) unblock_signals_trace(); - (*sig_info[sig])(sig, si, &r); + (*sig_info[sig])(sig, si, &r, mc); errno = save_errno; } @@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_BIT 1 #define SIGALRM_MASK (1 << SIGALRM_BIT) +#define SIGCHLD_BIT 2 +#define SIGCHLD_MASK (1 << SIGCHLD_BIT) + int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; @@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) return; } + if (!enabled && (sig == SIGCHLD)) { + signals_pending |= SIGCHLD_MASK; + return; + } + block_signals_trace(); sig_handler_common(sig, si, mc); @@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGIO] = sig_handler, [SIGWINCH] = sig_handler, + /* SIGCHLD is only actually registered in seccomp mode. */ + [SIGCHLD] = sig_handler, [SIGALRM] = timer_alarm_handler, [SIGUSR1] = sigusr1_handler, @@ -309,6 +320,12 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); + if (save_pending & SIGCHLD_MASK) { + struct uml_pt_regs regs = {}; + + sigchld_handler(SIGCHLD, NULL, ®s, NULL); + } + /* Do not reenter the handler */ if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index d7f1814b0e5a..8b9921ac3ef8 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp) print_hex_dump(UM_KERN_ERR, " syscall data: ", 0, 16, 4, sc, sizeof(*sc), 0); + + if (using_seccomp) { + printk(UM_KERN_ERR "%s: FD map num: %d", __func__, + mm_idp->syscall_fd_num); + print_hex_dump(UM_KERN_ERR, + " FD map: ", 0, 16, + sizeof(mm_idp->syscall_fd_map[0]), + mm_idp->syscall_fd_map, + sizeof(mm_idp->syscall_fd_map), 0); + } } static inline unsigned long *check_init_stack(struct mm_id * mm_idp, @@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) int n, i; int err, pid = mm_idp->pid; - n = ptrace_setregs(pid, syscall_regs); - if (n < 0) { - printk(UM_KERN_ERR "Registers - \n"); - for (i = 0; i < MAX_REG_NR; i++) - printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); - panic("%s : PTRACE_SETREGS failed, errno = %d\n", - __func__, -n); - } - /* Inform process how much we have filled in. */ proc_data->syscall_data_len = mm_idp->syscall_data_len; - err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) - panic("Failed to continue stub, pid = %d, errno = %d\n", pid, - errno); - - wait_stub_done(pid); + if (using_seccomp) { + proc_data->restart_wait = 1; + wait_stub_done_seccomp(mm_idp, 0, 1); + } else { + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers -\n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("%s : PTRACE_SETREGS failed, errno = %d\n", + __func__, -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", + pid, errno); + + wait_stub_done(pid); + } /* - * proc_data->err will be non-zero if there was an (unexpected) error. + * proc_data->err will be negative if there was an (unexpected) error. * In that case, syscall_data_len points to the last executed syscall, * otherwise it will be zero (but we do not need to rely on that). */ @@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) mm_idp->syscall_data_len = 0; } + if (using_seccomp) + mm_idp->syscall_fd_num = 0; + return mm_idp->syscall_data_len; } @@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp, return NULL; } +static int get_stub_fd(struct mm_id *mm_idp, int fd) +{ + int i; + + /* Find an FD slot (or flush and use first) */ + if (!using_seccomp) + return fd; + + /* Already crashed, value does not matter */ + if (mm_idp->syscall_data_len < 0) + return 0; + + /* Find existing FD in map if we can allocate another syscall */ + if (mm_idp->syscall_data_len < + ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) { + for (i = 0; i < mm_idp->syscall_fd_num; i++) { + if (mm_idp->syscall_fd_map[i] == fd) + return i; + } + + if (mm_idp->syscall_fd_num < STUB_MAX_FDS) { + i = mm_idp->syscall_fd_num; + mm_idp->syscall_fd_map[i] = fd; + + mm_idp->syscall_fd_num++; + + return i; + } + } + + /* FD map full or no syscall space available, continue after flush */ + do_syscall_stub(mm_idp); + mm_idp->syscall_fd_map[0] = fd; + mm_idp->syscall_fd_num = 1; + + return 0; +} + int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset) { @@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, /* Compress with previous syscall if that is possible */ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt); - if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && + if (sc && sc->mem.prot == prot && sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { - sc->mem.length += len; - return 0; + int prev_fd = sc->mem.fd; + + if (using_seccomp) + prev_fd = mm_idp->syscall_fd_map[sc->mem.fd]; + + if (phys_fd == prev_fd) { + sc->mem.length += len; + return 0; + } } + phys_fd = get_stub_fd(mm_idp, phys_fd); + sc = syscall_stub_alloc(mm_idp); sc->syscall = STUB_SYSCALL_MMAP; sc->mem.addr = virt; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index f683cfc9e51a..e42ffac23e3c 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -15,6 +16,7 @@ #include <sys/mman.h> #include <sys/wait.h> #include <sys/stat.h> +#include <sys/socket.h> #include <asm/unistd.h> #include <as-layout.h> #include <init.h> @@ -25,8 +27,11 @@ #include <registers.h> #include <skas.h> #include <sysdep/stub.h> +#include <sysdep/mcontext.h> +#include <linux/futex.h> #include <linux/threads.h> #include <timetravel.h> +#include <asm-generic/rwonce.h> #include "../internal.h" int is_skas_winch(int pid, int fd, void *data) @@ -142,6 +147,105 @@ bad_wait: fatal_sigsegv(); } +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys) +{ + struct stub_data *data = (void *)mm_idp->stack; + int ret; + + do { + const char byte = 0; + struct iovec iov = { + .iov_base = (void *)&byte, + .iov_len = sizeof(byte), + }; + union { + char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))]; + struct cmsghdr align; + } ctrl; + struct msghdr msgh = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + if (!running) { + if (mm_idp->syscall_fd_num) { + unsigned int fds_size = + sizeof(int) * mm_idp->syscall_fd_num; + struct cmsghdr *cmsg; + + msgh.msg_control = ctrl.data; + msgh.msg_controllen = CMSG_SPACE(fds_size); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(fds_size); + memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map, + fds_size); + + CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock, + &msgh, 0)); + } + + data->signal = 0; + data->futex = FUTEX_IN_CHILD; + CATCH_EINTR(syscall(__NR_futex, &data->futex, + FUTEX_WAKE, 1, NULL, NULL, 0)); + } + + do { + /* + * We need to check whether the child is still alive + * before and after the FUTEX_WAIT call. Before, in + * case it just died but we still updated data->futex + * to FUTEX_IN_CHILD. And after, in case it died while + * we were waiting (and SIGCHLD woke us up, see the + * IRQ handler in mmu.c). + * + * Either way, if PID is negative, then we have no + * choice but to kill the task. + */ + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + ret = syscall(__NR_futex, &data->futex, + FUTEX_WAIT, FUTEX_IN_CHILD, + NULL, NULL, 0); + if (ret < 0 && errno != EINTR && errno != EAGAIN) { + printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (data->futex == FUTEX_IN_CHILD); + + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + running = 0; + + /* We may receive a SIGALRM before SIGSYS, iterate again. */ + } while (wait_sigsys && data->signal == SIGALRM); + + if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) { + printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__); + goto out_kill; + } + + if (wait_sigsys && data->signal != SIGSYS) { + printk(UM_KERN_ERR "%s : expected SIGSYS but got %d", + __func__, data->signal); + goto out_kill; + } + + return; + +out_kill: + printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n", + __func__, mm_idp->pid, errno); + /* This is not true inside start_userspace */ + if (current_mm_id() == mm_idp) + fatal_sigsegv(); +} + extern unsigned long current_stub_stack(void); static void get_skas_faultinfo(int pid, struct faultinfo *fi) @@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi) memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); } -static void handle_segv(int pid, struct uml_pt_regs *regs) -{ - get_skas_faultinfo(pid, ®s->faultinfo); - segv(regs->faultinfo, 0, 1, NULL); -} - static void handle_trap(int pid, struct uml_pt_regs *regs) { if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) @@ -181,50 +279,80 @@ extern char __syscall_stub_start[]; static int stub_exe_fd; -static int userspace_tramp(void *stack) +struct tramp_data { + struct stub_data *stub_data; + /* 0 is inherited, 1 is the kernel side */ + int sockpair[2]; +}; + +#ifndef CLOSE_RANGE_CLOEXEC +#define CLOSE_RANGE_CLOEXEC (1U << 2) +#endif + +static int userspace_tramp(void *data) { + struct tramp_data *tramp_data = data; char *const argv[] = { "uml-userspace", NULL }; - int pipe_fds[2]; unsigned long long offset; struct stub_init_data init_data = { + .seccomp = using_seccomp, .stub_start = STUB_START, - .segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start, }; struct iomem_region *iomem; int ret; + if (using_seccomp) { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_signal_interrupt - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = STUB_CODE + + (unsigned long) stub_signal_restorer - + (unsigned long) __syscall_stub_start; + } else { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = 0; + } + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); init_data.stub_code_offset = MMAP_OFFSET(offset); - init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data), + &offset); init_data.stub_data_offset = MMAP_OFFSET(offset); - /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ - close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + /* + * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs + * and then unsetting it on all memory related FDs. + * This is not strictly necessary from a safety perspective. + */ + syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); fcntl(init_data.stub_data_fd, F_SETFD, 0); - for (iomem = iomem_regions; iomem; iomem = iomem->next) - fcntl(iomem->fd, F_SETFD, 0); - /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ - if (pipe(pipe_fds)) - exit(2); + /* In SECCOMP mode, these FDs are passed when needed */ + if (!using_seccomp) { + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + } - if (dup2(pipe_fds[0], 0) < 0) + /* dup2 signaling FD/socket to STDIN */ + if (dup2(tramp_data->sockpair[0], 0) < 0) exit(3); - close(pipe_fds[0]); + close(tramp_data->sockpair[0]); /* Write init_data and close write side */ - ret = write(pipe_fds[1], &init_data, sizeof(init_data)); - close(pipe_fds[1]); + ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data)); + close(tramp_data->sockpair[1]); if (ret != sizeof(init_data)) exit(4); - execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + /* Raw execveat for compatibility with older libc versions */ + syscall(__NR_execveat, stub_exe_fd, (unsigned long)"", + (unsigned long)argv, NULL, AT_EMPTY_PATH); exit(5); } @@ -305,11 +433,12 @@ static int __init init_stub_exe_fd(void) } __initcall(init_stub_exe_fd); +int using_seccomp; int userspace_pid[NR_CPUS]; /** * start_userspace() - prepare a new userspace process - * @stub_stack: pointer to the stub stack. + * @mm_id: The corresponding struct mm_id * * Setups a new temporary stack page that is used while userspace_tramp() runs * Clones the kernel process into a new userspace process, with FDs only. @@ -318,11 +447,15 @@ int userspace_pid[NR_CPUS]; * when negative: an error number. * FIXME: can PIDs become negative?! */ -int start_userspace(unsigned long stub_stack) +int start_userspace(struct mm_id *mm_id) { + struct stub_data *proc_data = (void *)mm_id->stack; + struct tramp_data tramp_data = { + .stub_data = proc_data, + }; void *stack; unsigned long sp; - int pid, status, n, err; + int status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -338,40 +471,55 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, + /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) { + err = -errno; + printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n", + __func__, errno); + return err; + } + + if (using_seccomp) + proc_data->futex = FUTEX_IN_CHILD; + + mm_id->pid = clone(userspace_tramp, (void *) sp, CLONE_VFORK | CLONE_VM | SIGCHLD, - (void *)stub_stack); - if (pid < 0) { + (void *)&tramp_data); + if (mm_id->pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", __func__, errno); - return err; + goto out_close; } - do { - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); - if (n < 0) { + if (using_seccomp) { + wait_stub_done_seccomp(mm_id, 1, 1); + } else { + do { + CATCH_EINTR(n = waitpid(mm_id->pid, &status, + WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", + __func__, status); + goto out_kill; + } + + if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { err = -errno; - printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", __func__, errno); goto out_kill; } - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); - - if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { - err = -EINVAL; - printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", - __func__, status); - goto out_kill; - } - - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, - (void *) PTRACE_O_TRACESYSGOOD) < 0) { - err = -errno; - printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", - __func__, errno); - goto out_kill; } if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { @@ -381,10 +529,22 @@ int start_userspace(unsigned long stub_stack) goto out_kill; } - return pid; + close(tramp_data.sockpair[0]); + if (using_seccomp) + mm_id->sock = tramp_data.sockpair[1]; + else + close(tramp_data.sockpair[1]); + + return 0; + +out_kill: + os_kill_ptraced_process(mm_id->pid, 1); +out_close: + close(tramp_data.sockpair[0]); + close(tramp_data.sockpair[1]); + + mm_id->pid = -1; - out_kill: - os_kill_ptraced_process(pid, 1); return err; } @@ -394,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; - siginfo_t si; + siginfo_t si_ptrace; + siginfo_t *si; + int sig; /* Handle any immediate reschedules or signals */ interrupt_end(); @@ -427,103 +589,177 @@ void userspace(struct uml_pt_regs *regs) current_mm_sync(); - /* Flush out any pending syscalls */ - err = syscall_stub_flush(current_mm_id()); - if (err) { - if (err == -ENOMEM) - report_enomem(); + if (using_seccomp) { + struct mm_id *mm_id = current_mm_id(); + struct stub_data *proc_data = (void *) mm_id->stack; + int ret; - printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", - __func__, -err); - fatal_sigsegv(); - } + ret = set_stub_state(regs, proc_data, singlestepping()); + if (ret) { + printk(UM_KERN_ERR "%s - failed to set regs: %d", + __func__, ret); + fatal_sigsegv(); + } - /* - * This can legitimately fail if the process loads a - * bogus value into a segment register. It will - * segfault and PTRACE_GETREGS will read that value - * out of the process. However, PTRACE_SETREGS will - * fail. In this case, there is nothing to do but - * just kill the process. - */ - if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Must have been reset by the syscall caller */ + if (proc_data->restart_wait != 0) + panic("Programming error: Flag to only run syscalls in child was not cleared!"); - if (put_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Mark pending syscalls for flushing */ + proc_data->syscall_data_len = mm_id->syscall_data_len; - if (singlestepping()) - op = PTRACE_SYSEMU_SINGLESTEP; - else - op = PTRACE_SYSEMU; + wait_stub_done_seccomp(mm_id, 0, 0); - if (ptrace(op, pid, 0, 0)) { - printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", - __func__, op, errno); - fatal_sigsegv(); - } + sig = proc_data->signal; - CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); - if (err < 0) { - printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + if (sig == SIGTRAP && proc_data->err != 0) { + printk(UM_KERN_ERR "%s - Error flushing stub syscalls", + __func__); + syscall_stub_dump_error(mm_id); + mm_id->syscall_data_len = proc_data->err; + fatal_sigsegv(); + } - regs->is_user = 1; - if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + mm_id->syscall_data_len = 0; + mm_id->syscall_fd_num = 0; - if (get_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + ret = get_stub_state(regs, proc_data, NULL); + if (ret) { + printk(UM_KERN_ERR "%s - failed to get regs: %d", + __func__, ret); + fatal_sigsegv(); + } - UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) + panic("%s - Invalid siginfo offset from child", + __func__); + si = (void *)&proc_data->sigstack[proc_data->si_offset]; - if (WIFSTOPPED(status)) { - int sig = WSTOPSIG(status); + regs->is_user = 1; - /* These signal handlers need the si argument. - * The SIGIO and SIGALARM handlers which constitute the - * majority of invocations, do not use it. + /* Fill in ORIG_RAX and extract fault information */ + PT_SYSCALL_NR(regs->gp) = si->si_syscall; + if (sig == SIGSEGV) { + mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset]; + + GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext); + } + } else { + /* Flush out any pending syscalls */ + err = syscall_stub_flush(current_mm_id()); + if (err) { + if (err == -ENOMEM) + report_enomem(); + + printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", + __func__, -err); + fatal_sigsegv(); + } + + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. */ - switch (sig) { - case SIGSEGV: - case SIGTRAP: - case SIGILL: - case SIGBUS: - case SIGFPE: - case SIGWINCH: - ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); - break; + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); } - switch (sig) { - case SIGSEGV: - if (PTRACE_FULL_FAULTINFO) { + if (put_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (singlestepping()) + op = PTRACE_SYSEMU_SINGLESTEP; + else + op = PTRACE_SYSEMU; + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", + __func__, op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (WIFSTOPPED(status)) { + sig = WSTOPSIG(status); + + /* + * These signal handlers need the si argument + * and SIGSEGV needs the faultinfo. + * The SIGIO and SIGALARM handlers which constitute + * the majority of invocations, do not use it. + */ + switch (sig) { + case SIGSEGV: get_skas_faultinfo(pid, ®s->faultinfo); - (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, - regs); + fallthrough; + case SIGTRAP: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + ptrace(PTRACE_GETSIGINFO, pid, 0, + (struct siginfo *)&si_ptrace); + si = &si_ptrace; + break; + default: + si = NULL; + break; } - else handle_segv(pid, regs); + } else { + sig = 0; + } + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (sig) { + switch (sig) { + case SIGSEGV: + if (using_seccomp || PTRACE_FULL_FAULTINFO) + (*sig_info[SIGSEGV])(SIGSEGV, + (struct siginfo *)si, + regs, NULL); + else + segv(regs->faultinfo, 0, 1, NULL, NULL); + + break; + case SIGSYS: + handle_syscall(regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); break; case SIGTRAP: - relay_signal(SIGTRAP, (struct siginfo *)&si, regs); + relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL); break; case SIGALRM: break; @@ -533,7 +769,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals_trace(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL); unblock_signals_trace(); break; default: diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 93fc82c01aba..a827c2e01aa5 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -24,6 +25,13 @@ #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> +#include <stdbool.h> +#include <stub-data.h> +#include <sys/prctl.h> +#include <linux/seccomp.h> +#include <linux/filter.h> +#include <sysdep/mcontext.h> +#include <sysdep/stub.h> #include <registers.h> #include <skas.h> #include "internal.h" @@ -224,6 +232,140 @@ static void __init check_ptrace(void) check_sysemu(); } +extern unsigned long host_fp_size; +extern unsigned long exec_regs[MAX_REG_NR]; +extern unsigned long *exec_fp_regs; + +__initdata static struct stub_data *seccomp_test_stub_data; + +static void __init sigsys_handler(int sig, siginfo_t *info, void *p) +{ + ucontext_t *uc = p; + + /* Stow away the location of the mcontext in the stack */ + seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext - + (unsigned long)&seccomp_test_stub_data->sigstack[0]; + + /* Prevent libc from clearing memory (mctx_offset in particular) */ + syscall(__NR_exit, 0); +} + +static int __init seccomp_helper(void *data) +{ + static struct sock_filter filter[] = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), + }; + static struct sock_fprog prog = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + struct sigaction sa; + + /* close_range is needed for the stub */ + if (stub_syscall3(__NR_close_range, 1, ~0U, 0)) + exit(1); + + set_sigstack(seccomp_test_stub_data->sigstack, + sizeof(seccomp_test_stub_data->sigstack)); + + sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; + sa.sa_sigaction = (void *) sigsys_handler; + sa.sa_restorer = NULL; + if (sigaction(SIGSYS, &sa, NULL) < 0) + exit(2); + + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) + exit(3); + + sleep(0); + + /* Never reached. */ + _exit(4); +} + +static bool __init init_seccomp(void) +{ + int pid; + int status; + int n; + unsigned long sp; + + /* + * We check that we can install a seccomp filter and then exit(0) + * from a trapped syscall. + * + * Note that we cannot verify that no seccomp filter already exists + * for a syscall that results in the process/thread to be killed. + */ + + os_info("Checking that seccomp filters can be installed..."); + + seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, 0, 0); + + /* Use the syscall data area as stack, we just need something */ + sp = (unsigned long)&seccomp_test_stub_data->syscall_data + + sizeof(seccomp_test_stub_data->syscall_data) - + sizeof(void *); + pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL); + + if (pid < 0) + fatal_perror("check_seccomp : clone failed"); + + CATCH_EINTR(n = waitpid(pid, &status, __WCLONE)); + if (n < 0) + fatal_perror("check_seccomp : waitpid failed"); + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + struct uml_pt_regs *regs; + unsigned long fp_size; + int r; + + /* Fill in the host_fp_size from the mcontext. */ + regs = calloc(1, sizeof(struct uml_pt_regs)); + get_stub_state(regs, seccomp_test_stub_data, &fp_size); + host_fp_size = fp_size; + free(regs); + + /* Repeat with the correct size */ + regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size); + r = get_stub_state(regs, seccomp_test_stub_data, NULL); + + /* Store as the default startup registers */ + exec_fp_regs = malloc(host_fp_size); + memcpy(exec_regs, regs->gp, sizeof(exec_regs)); + memcpy(exec_fp_regs, regs->fp, host_fp_size); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + + free(regs); + + if (r) { + os_info("failed to fetch registers: %d\n", r); + return false; + } + + os_info("OK\n"); + return true; + } + + if (WIFEXITED(status) && WEXITSTATUS(status) == 2) + os_info("missing\n"); + else + os_info("error\n"); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + return false; +} + + static void __init check_coredump_limit(void) { struct rlimit lim; @@ -278,6 +420,44 @@ void __init get_host_cpu_features( } } +static int seccomp_config __initdata; + +static int __init uml_seccomp_config(char *line, int *add) +{ + *add = 0; + + if (strcmp(line, "off") == 0) + seccomp_config = 0; + else if (strcmp(line, "auto") == 0) + seccomp_config = 1; + else if (strcmp(line, "on") == 0) + seccomp_config = 2; + else + fatal("Invalid seccomp option '%s', expected on/auto/off\n", + line); + + return 0; +} + +__uml_setup("seccomp=", uml_seccomp_config, +"seccomp=<on/auto/off>\n" +" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n" +" processes work collaboratively with the kernel instead of being\n" +" traced using ptrace. All syscalls from the application are caught and\n" +" redirected using a signal. This signal handler in turn is permitted to\n" +" do the selected set of syscalls to communicate with the UML kernel and\n" +" do the required memory management.\n" +"\n" +" This method is overall faster than the ptrace based userspace, primarily\n" +" because it reduces the number of context switches for (minor) page faults.\n" +"\n" +" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n" +" userspace from reading and writing all physical memory. Userspace\n" +" processes could also trick the stub into disabling SIGALRM which\n" +" prevents it from being interrupted for scheduling purposes.\n" +"\n" +" This is insecure and should only be used with a trusted userspace\n\n" +); void __init os_early_checks(void) { @@ -286,13 +466,24 @@ void __init os_early_checks(void) /* Print out the core dump limits early */ check_coredump_limit(); - check_ptrace(); - /* Need to check this early because mmapping happens before the * kernel is running. */ check_tmpexec(); + if (seccomp_config) { + if (init_seccomp()) { + using_seccomp = 1; + return; + } + + if (seccomp_config == 2) + fatal("SECCOMP userspace requested but not functional!\n"); + } + + using_seccomp = 0; + check_ptrace(); + pid = start_ptraced_child(); if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); |