summaryrefslogtreecommitdiff
path: root/arch/um
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-18 04:56:44 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-18 04:56:44 +0300
commit345b17acb1aa7a443741d9220f66b30d5ddd7c39 (patch)
tree8f75bc2b08540b47fc8c66a262f8add6bbad70eb /arch/um
parent787fec8ac15cc693b9a7bc1b4a338b92483d993c (diff)
parent1fb1abc83636f5329c26cd29f0f19f3faeb697a5 (diff)
downloadlinux-345b17acb1aa7a443741d9220f66b30d5ddd7c39.tar.xz
Merge tag 'for-linus-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger: - IRQ handling cleanups - Support for suspend - Various fixes for UML specific drivers: ubd, vector, xterm * tag 'for-linus-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml: (32 commits) um: Fix build w/o CONFIG_PM_SLEEP um: time-travel: Correct time event IRQ delivery um: irq/sigio: Support suspend/resume handling of workaround IRQs um: time-travel: Actually apply "free-until" optimisation um: chan_xterm: Fix fd leak um: tty: Fix handling of close in tty lines um: Monitor error events in IRQ controller um: allocate a guard page to helper threads um: support some of ARCH_HAS_SET_MEMORY um: time-travel: avoid multiple identical propagations um: Fetch registers only for signals which need them um: Support suspend to RAM um: Allow PM with suspend-to-idle um: time: Fix read_persistent_clock64() in time-travel um: Simplify os_idle_sleep() and sleep longer um: Simplify IRQ handling code um: Remove IRQ_NONE type um: irq: Reduce irq_reg allocation um: irq: Clean up and rename struct irq_fd um: Clean up alarm IRQ chip name ...
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/Kconfig6
-rw-r--r--arch/um/drivers/chan_user.c4
-rw-r--r--arch/um/drivers/line.c30
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/port_kern.c4
-rw-r--r--arch/um/drivers/random.c105
-rw-r--r--arch/um/drivers/ubd_kern.c273
-rw-r--r--arch/um/drivers/vector_kern.c10
-rw-r--r--arch/um/drivers/virtio_uml.c31
-rw-r--r--arch/um/drivers/xterm.c5
-rw-r--r--arch/um/drivers/xterm_kern.c2
-rw-r--r--arch/um/include/asm/irq.h9
-rw-r--r--arch/um/include/asm/pgtable.h3
-rw-r--r--arch/um/include/asm/set_memory.h1
-rw-r--r--arch/um/include/linux/time-internal.h4
-rw-r--r--arch/um/include/shared/common-offsets.h3
-rw-r--r--arch/um/include/shared/irq_kern.h13
-rw-r--r--arch/um/include/shared/irq_user.h21
-rw-r--r--arch/um/include/shared/kern_util.h5
-rw-r--r--arch/um/include/shared/os.h23
-rw-r--r--arch/um/kernel/irq.c534
-rw-r--r--arch/um/kernel/process.c24
-rw-r--r--arch/um/kernel/sigio.c2
-rw-r--r--arch/um/kernel/time.c97
-rw-r--r--arch/um/kernel/tlb.c54
-rw-r--r--arch/um/kernel/um_arch.c67
-rw-r--r--arch/um/os-Linux/Makefile2
-rw-r--r--arch/um/os-Linux/helper.c4
-rw-r--r--arch/um/os-Linux/irq.c4
-rw-r--r--arch/um/os-Linux/sigio.c70
-rw-r--r--arch/um/os-Linux/signal.c22
-rw-r--r--arch/um/os-Linux/skas/process.c15
-rw-r--r--arch/um/os-Linux/time.c17
-rw-r--r--arch/um/os-Linux/umid.c17
35 files changed, 897 insertions, 588 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 43333e36e0ba..34d302d1a07f 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -15,6 +15,7 @@ config UML
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_BUGVERBOSE
select NO_DMA
+ select ARCH_HAS_SET_MEMORY
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select HAVE_GCC_PLUGINS
@@ -191,3 +192,8 @@ config UML_TIME_TRAVEL_SUPPORT
endmenu
source "arch/um/drivers/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+ def_bool y
+
+source "kernel/power/Kconfig"
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 4d80526a4236..d8845d4aac6a 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -26,10 +26,10 @@ int generic_read(int fd, char *c_out, void *unused)
n = read(fd, c_out, sizeof(*c_out));
if (n > 0)
return n;
- else if (errno == EAGAIN)
- return 0;
else if (n == 0)
return -EIO;
+ else if (errno == EAGAIN)
+ return 0;
return -errno;
}
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 14ad9f495fe6..1c70a31e7c5b 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -262,19 +262,25 @@ static irqreturn_t line_write_interrupt(int irq, void *data)
int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
{
const struct line_driver *driver = line->driver;
- int err = 0;
+ int err;
- if (input)
+ if (input) {
err = um_request_irq(driver->read_irq, fd, IRQ_READ,
line_interrupt, IRQF_SHARED,
driver->read_irq_name, data);
- if (err)
- return err;
- if (output)
+ if (err < 0)
+ return err;
+ }
+
+ if (output) {
err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
line_write_interrupt, IRQF_SHARED,
driver->write_irq_name, data);
- return err;
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
}
static int line_activate(struct tty_port *port, struct tty_struct *tty)
@@ -608,7 +614,6 @@ static void free_winch(struct winch *winch)
winch->fd = -1;
if (fd != -1)
os_close_file(fd);
- list_del(&winch->list);
__free_winch(&winch->work);
}
@@ -709,6 +714,8 @@ static void unregister_winch(struct tty_struct *tty)
winch = list_entry(ele, struct winch, list);
wtty = tty_port_tty_get(winch->port);
if (wtty == tty) {
+ list_del(&winch->list);
+ spin_unlock(&winch_handler_lock);
free_winch(winch);
break;
}
@@ -719,14 +726,17 @@ static void unregister_winch(struct tty_struct *tty)
static void winch_cleanup(void)
{
- struct list_head *ele, *next;
struct winch *winch;
spin_lock(&winch_handler_lock);
+ while ((winch = list_first_entry_or_null(&winch_handlers,
+ struct winch, list))) {
+ list_del(&winch->list);
+ spin_unlock(&winch_handler_lock);
- list_for_each_safe(ele, next, &winch_handlers) {
- winch = list_entry(ele, struct winch, list);
free_winch(winch);
+
+ spin_lock(&winch_handler_lock);
}
spin_unlock(&winch_handler_lock);
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index a2e680f7d39f..6d00af25ec6b 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -738,7 +738,7 @@ static int __init mconsole_init(void)
err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt,
IRQF_SHARED, "mconsole", (void *)sock);
- if (err) {
+ if (err < 0) {
printk(KERN_ERR "Failed to get IRQ for management console\n");
goto out;
}
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1802cf4ef5a5..2fc0b038ff8a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -160,7 +160,7 @@ static int uml_net_open(struct net_device *dev)
err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
IRQF_SHARED, dev->name, dev);
- if (err != 0) {
+ if (err < 0) {
printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
err = -ENETUNREACH;
goto out_close;
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index a47ca5376d9d..efa8b7304090 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -100,7 +100,7 @@ static int port_accept(struct port_list *port)
.port = port });
if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt,
- IRQF_SHARED, "telnetd", conn)) {
+ IRQF_SHARED, "telnetd", conn) < 0) {
printk(KERN_ERR "port_accept : failed to get IRQ for "
"telnetd\n");
goto out_free;
@@ -182,7 +182,7 @@ void *port_data(int port_num)
}
if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt,
- IRQF_SHARED, "port", port)) {
+ IRQF_SHARED, "port", port) < 0) {
printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num);
goto out_close;
}
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index ce115fce52f0..433a3f8f2ef3 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/interrupt.h>
#include <linux/miscdevice.h>
+#include <linux/hw_random.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <init.h>
@@ -18,9 +19,8 @@
#include <os.h>
/*
- * core module and version information
+ * core module information
*/
-#define RNG_VERSION "1.0.0"
#define RNG_MODULE_NAME "hw_random"
/* Changed at init time, in the non-modular case, and at module load
@@ -28,88 +28,36 @@
* protects against a module being loaded twice at the same time.
*/
static int random_fd = -1;
-static DECLARE_WAIT_QUEUE_HEAD(host_read_wait);
+static struct hwrng hwrng = { 0, };
+static DECLARE_COMPLETION(have_data);
-static int rng_dev_open (struct inode *inode, struct file *filp)
+static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block)
{
- /* enforce read-only access to this chrdev */
- if ((filp->f_mode & FMODE_READ) == 0)
- return -EINVAL;
- if ((filp->f_mode & FMODE_WRITE) != 0)
- return -EINVAL;
+ int ret;
- return 0;
-}
-
-static atomic_t host_sleep_count = ATOMIC_INIT(0);
-
-static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
- loff_t *offp)
-{
- u32 data;
- int n, ret = 0, have_data;
-
- while (size) {
- n = os_read_file(random_fd, &data, sizeof(data));
- if (n > 0) {
- have_data = n;
- while (have_data && size) {
- if (put_user((u8) data, buf++)) {
- ret = ret ? : -EFAULT;
- break;
- }
- size--;
- ret++;
- have_data--;
- data >>= 8;
- }
- }
- else if (n == -EAGAIN) {
- DECLARE_WAITQUEUE(wait, current);
-
- if (filp->f_flags & O_NONBLOCK)
- return ret ? : -EAGAIN;
-
- atomic_inc(&host_sleep_count);
+ for (;;) {
+ ret = os_read_file(random_fd, buf, max);
+ if (block && ret == -EAGAIN) {
add_sigio_fd(random_fd);
- add_wait_queue(&host_read_wait, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
+ ret = wait_for_completion_killable(&have_data);
- schedule();
- remove_wait_queue(&host_read_wait, &wait);
+ ignore_sigio_fd(random_fd);
+ deactivate_fd(random_fd, RANDOM_IRQ);
- if (atomic_dec_and_test(&host_sleep_count)) {
- ignore_sigio_fd(random_fd);
- deactivate_fd(random_fd, RANDOM_IRQ);
- }
+ if (ret < 0)
+ break;
+ } else {
+ break;
}
- else
- return n;
-
- if (signal_pending (current))
- return ret ? : -ERESTARTSYS;
}
- return ret;
-}
-
-static const struct file_operations rng_chrdev_ops = {
- .owner = THIS_MODULE,
- .open = rng_dev_open,
- .read = rng_dev_read,
- .llseek = noop_llseek,
-};
-/* rng_init shouldn't be called more than once at boot time */
-static struct miscdevice rng_miscdev = {
- HWRNG_MINOR,
- RNG_MODULE_NAME,
- &rng_chrdev_ops,
-};
+ return ret != -EAGAIN ? ret : 0;
+}
static irqreturn_t random_interrupt(int irq, void *data)
{
- wake_up(&host_read_wait);
+ complete(&have_data);
return IRQ_HANDLED;
}
@@ -126,18 +74,19 @@ static int __init rng_init (void)
goto out;
random_fd = err;
-
err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt,
0, "random", NULL);
- if (err)
+ if (err < 0)
goto err_out_cleanup_hw;
- sigio_broken(random_fd, 1);
+ sigio_broken(random_fd);
+ hwrng.name = RNG_MODULE_NAME;
+ hwrng.read = rng_dev_read;
+ hwrng.quality = 1024;
- err = misc_register (&rng_miscdev);
+ err = hwrng_register(&hwrng);
if (err) {
- printk (KERN_ERR RNG_MODULE_NAME ": misc device register "
- "failed\n");
+ pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err);
goto err_out_cleanup_hw;
}
out:
@@ -161,8 +110,8 @@ static void cleanup(void)
static void __exit rng_cleanup(void)
{
+ hwrng_unregister(&hwrng);
os_close_file(random_fd);
- misc_deregister (&rng_miscdev);
}
module_init (rng_init);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index eae8c83364f7..13b1fe694b90 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -47,18 +47,25 @@
/* Max request size is determined by sector mask - 32K */
#define UBD_MAX_REQUEST (8 * sizeof(long))
+struct io_desc {
+ char *buffer;
+ unsigned long length;
+ unsigned long sector_mask;
+ unsigned long long cow_offset;
+ unsigned long bitmap_words[2];
+};
+
struct io_thread_req {
struct request *req;
int fds[2];
unsigned long offsets[2];
unsigned long long offset;
- unsigned long length;
- char *buffer;
int sectorsize;
- unsigned long sector_mask;
- unsigned long long cow_offset;
- unsigned long bitmap_words[2];
int error;
+
+ int desc_cnt;
+ /* io_desc has to be the last element of the struct */
+ struct io_desc io_desc[];
};
@@ -148,6 +155,7 @@ struct ubd {
/* name (and fd, below) of the file opened for writing, either the
* backing or the cow file. */
char *file;
+ char *serial;
int count;
int fd;
__u64 size;
@@ -173,6 +181,7 @@ struct ubd {
#define DEFAULT_UBD { \
.file = NULL, \
+ .serial = NULL, \
.count = 0, \
.fd = -1, \
.size = -1, \
@@ -265,7 +274,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
{
struct ubd *ubd_dev;
struct openflags flags = global_openflags;
- char *backing_file;
+ char *file, *backing_file, *serial;
int n, err = 0, i;
if(index_out) *index_out = -1;
@@ -361,24 +370,27 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
goto out;
break_loop:
- backing_file = strchr(str, ',');
+ file = strsep(&str, ",:");
+ if (*file == '\0')
+ file = NULL;
- if (backing_file == NULL)
- backing_file = strchr(str, ':');
+ backing_file = strsep(&str, ",:");
+ if (*backing_file == '\0')
+ backing_file = NULL;
- if(backing_file != NULL){
- if(ubd_dev->no_cow){
- *error_out = "Can't specify both 'd' and a cow file";
- goto out;
- }
- else {
- *backing_file = '\0';
- backing_file++;
- }
+ serial = strsep(&str, ",:");
+ if (*serial == '\0')
+ serial = NULL;
+
+ if (backing_file && ubd_dev->no_cow) {
+ *error_out = "Can't specify both 'd' and a cow file";
+ goto out;
}
+
err = 0;
- ubd_dev->file = str;
+ ubd_dev->file = file;
ubd_dev->cow.file = backing_file;
+ ubd_dev->serial = serial;
ubd_dev->boot_openflags = flags;
out:
mutex_unlock(&ubd_lock);
@@ -399,7 +411,7 @@ static int ubd_setup(char *str)
__setup("ubd", ubd_setup);
__uml_help(ubd_setup,
-"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
+"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
" This is used to associate a device with a file in the underlying\n"
" filesystem. When specifying two filenames, the first one is the\n"
" COW name and the second is the backing file name. As separator you can\n"
@@ -422,6 +434,12 @@ __uml_help(ubd_setup,
" UMLs and file locking will be turned off - this is appropriate for a\n"
" cluster filesystem and inappropriate at almost all other times.\n\n"
" 't' will disable trim/discard support on the device (enabled by default).\n\n"
+" An optional device serial number can be exposed using the serial parameter\n"
+" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
+" useful when a unique number should be given to the device. Note when\n"
+" specifying a label, the filename2 must be also presented. It can be\n"
+" an empty string, in which case the backing file is not used:\n"
+" ubd0=File,,Serial\n"
);
static int udb_setup(char *str)
@@ -525,12 +543,7 @@ static void ubd_handler(void)
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
}
- if ((io_req->error) || (io_req->buffer == NULL))
- blk_mq_end_request(io_req->req, io_req->error);
- else {
- if (!blk_update_request(io_req->req, io_req->error, io_req->length))
- __blk_mq_end_request(io_req->req, io_req->error);
- }
+ blk_mq_end_request(io_req->req, io_req->error);
kfree(io_req);
}
}
@@ -866,6 +879,41 @@ static void ubd_device_release(struct device *dev)
*ubd_dev = ((struct ubd) DEFAULT_UBD);
}
+static ssize_t serial_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct ubd *ubd_dev = disk->private_data;
+
+ if (!ubd_dev)
+ return 0;
+
+ return sprintf(buf, "%s", ubd_dev->serial);
+}
+
+static DEVICE_ATTR_RO(serial);
+
+static struct attribute *ubd_attrs[] = {
+ &dev_attr_serial.attr,
+ NULL,
+};
+
+static umode_t ubd_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ return a->mode;
+}
+
+static const struct attribute_group ubd_attr_group = {
+ .attrs = ubd_attrs,
+ .is_visible = ubd_attrs_are_visible,
+};
+
+static const struct attribute_group *ubd_attr_groups[] = {
+ &ubd_attr_group,
+ NULL,
+};
+
static int ubd_disk_register(int major, u64 size, int unit,
struct gendisk **disk_out)
{
@@ -897,7 +945,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
disk->private_data = &ubd_devs[unit];
disk->queue = ubd_devs[unit].queue;
- device_add_disk(parent, disk, NULL);
+ device_add_disk(parent, disk, ubd_attr_groups);
*disk_out = disk;
return 0;
@@ -946,6 +994,7 @@ static int ubd_add(int n, char **error_out)
blk_queue_write_cache(ubd_dev->queue, true, false);
blk_queue_max_segments(ubd_dev->queue, MAX_SG);
+ blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
if(err){
*error_out = "Failed to register device";
@@ -1192,7 +1241,7 @@ static int __init ubd_driver_init(void){
/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
* enough. So use anyway the io thread. */
}
- stack = alloc_stack(0, 0);
+ stack = alloc_stack(0);
io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
&thread_fd);
if(io_pid < 0){
@@ -1204,7 +1253,7 @@ static int __init ubd_driver_init(void){
}
err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
0, "ubd", ubd_devs);
- if(err != 0)
+ if(err < 0)
printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
return 0;
}
@@ -1289,37 +1338,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
*cow_offset += bitmap_offset;
}
-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
+static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
+ unsigned long offset, unsigned long *bitmap,
__u64 bitmap_offset, __u64 bitmap_len)
{
- __u64 sector = req->offset >> SECTOR_SHIFT;
+ __u64 sector = offset >> SECTOR_SHIFT;
int i;
- if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
+ if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
panic("Operation too long");
if (req_op(req->req) == REQ_OP_READ) {
- for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
+ for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
ubd_set_bit(i, (unsigned char *)
- &req->sector_mask);
+ &segment->sector_mask);
}
+ } else {
+ cowify_bitmap(offset, segment->length, &segment->sector_mask,
+ &segment->cow_offset, bitmap, bitmap_offset,
+ segment->bitmap_words, bitmap_len);
}
- else cowify_bitmap(req->offset, req->length, &req->sector_mask,
- &req->cow_offset, bitmap, bitmap_offset,
- req->bitmap_words, bitmap_len);
}
-static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
- u64 off, struct bio_vec *bvec)
+static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
+ struct request *req)
+{
+ struct bio_vec bvec;
+ struct req_iterator iter;
+ int i = 0;
+ unsigned long byte_offset = io_req->offset;
+ int op = req_op(req);
+
+ if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
+ io_req->io_desc[0].buffer = NULL;
+ io_req->io_desc[0].length = blk_rq_bytes(req);
+ } else {
+ rq_for_each_segment(bvec, req, iter) {
+ BUG_ON(i >= io_req->desc_cnt);
+
+ io_req->io_desc[i].buffer =
+ page_address(bvec.bv_page) + bvec.bv_offset;
+ io_req->io_desc[i].length = bvec.bv_len;
+ i++;
+ }
+ }
+
+ if (dev->cow.file) {
+ for (i = 0; i < io_req->desc_cnt; i++) {
+ cowify_req(io_req, &io_req->io_desc[i], byte_offset,
+ dev->cow.bitmap, dev->cow.bitmap_offset,
+ dev->cow.bitmap_len);
+ byte_offset += io_req->io_desc[i].length;
+ }
+
+ }
+}
+
+static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
+ int desc_cnt)
{
- struct ubd *dev = hctx->queue->queuedata;
struct io_thread_req *io_req;
- int ret;
+ int i;
- io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
+ io_req = kmalloc(sizeof(*io_req) +
+ (desc_cnt * sizeof(struct io_desc)),
+ GFP_ATOMIC);
if (!io_req)
- return -ENOMEM;
+ return NULL;
io_req->req = req;
if (dev->cow.file)
@@ -1327,26 +1413,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
else
io_req->fds[0] = dev->fd;
io_req->error = 0;
-
- if (bvec != NULL) {
- io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
- io_req->length = bvec->bv_len;
- } else {
- io_req->buffer = NULL;
- io_req->length = blk_rq_bytes(req);
- }
-
io_req->sectorsize = SECTOR_SIZE;
io_req->fds[1] = dev->fd;
- io_req->cow_offset = -1;
- io_req->offset = off;
- io_req->sector_mask = 0;
+ io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
io_req->offsets[0] = 0;
io_req->offsets[1] = dev->cow.data_offset;
- if (dev->cow.file)
- cowify_req(io_req, dev->cow.bitmap,
- dev->cow.bitmap_offset, dev->cow.bitmap_len);
+ for (i = 0 ; i < desc_cnt; i++) {
+ io_req->io_desc[i].sector_mask = 0;
+ io_req->io_desc[i].cow_offset = -1;
+ }
+
+ return io_req;
+}
+
+static int ubd_submit_request(struct ubd *dev, struct request *req)
+{
+ int segs = 0;
+ struct io_thread_req *io_req;
+ int ret;
+ int op = req_op(req);
+
+ if (op == REQ_OP_FLUSH)
+ segs = 0;
+ else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
+ segs = 1;
+ else
+ segs = blk_rq_nr_phys_segments(req);
+
+ io_req = ubd_alloc_req(dev, req, segs);
+ if (!io_req)
+ return -ENOMEM;
+
+ io_req->desc_cnt = segs;
+ if (segs)
+ ubd_map_req(dev, io_req, req);
ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
if (ret != sizeof(io_req)) {
@@ -1357,22 +1458,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
return ret;
}
-static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
-{
- struct req_iterator iter;
- struct bio_vec bvec;
- int ret;
- u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
-
- rq_for_each_segment(bvec, req, iter) {
- ret = ubd_queue_one_vec(hctx, req, off, &bvec);
- if (ret < 0)
- return ret;
- off += bvec.bv_len;
- }
- return 0;
-}
-
static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -1385,17 +1470,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(&ubd_dev->lock);
switch (req_op(req)) {
- /* operations with no lentgth/offset arguments */
case REQ_OP_FLUSH:
- ret = ubd_queue_one_vec(hctx, req, 0, NULL);
- break;
case REQ_OP_READ:
case REQ_OP_WRITE:
- ret = queue_rw_req(hctx, req);
- break;
case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
- ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+ ret = ubd_submit_request(ubd_dev, req);
break;
default:
WARN_ON_ONCE(1);
@@ -1483,22 +1563,22 @@ static int map_error(int error_code)
* will result in unpredictable behaviour and/or crashes.
*/
-static int update_bitmap(struct io_thread_req *req)
+static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
{
int n;
- if(req->cow_offset == -1)
+ if (segment->cow_offset == -1)
return map_error(0);
- n = os_pwrite_file(req->fds[1], &req->bitmap_words,
- sizeof(req->bitmap_words), req->cow_offset);
- if (n != sizeof(req->bitmap_words))
+ n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
+ sizeof(segment->bitmap_words), segment->cow_offset);
+ if (n != sizeof(segment->bitmap_words))
return map_error(-n);
return map_error(0);
}
-static void do_io(struct io_thread_req *req)
+static void do_io(struct io_thread_req *req, struct io_desc *desc)
{
char *buf = NULL;
unsigned long len;
@@ -1513,21 +1593,20 @@ static void do_io(struct io_thread_req *req)
return;
}
- nsectors = req->length / req->sectorsize;
+ nsectors = desc->length / req->sectorsize;
start = 0;
do {
- bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
+ bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
end = start;
while((end < nsectors) &&
- (ubd_test_bit(end, (unsigned char *)
- &req->sector_mask) == bit))
+ (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
end++;
off = req->offset + req->offsets[bit] +
start * req->sectorsize;
len = (end - start) * req->sectorsize;
- if (req->buffer != NULL)
- buf = &req->buffer[start * req->sectorsize];
+ if (desc->buffer != NULL)
+ buf = &desc->buffer[start * req->sectorsize];
switch (req_op(req->req)) {
case REQ_OP_READ:
@@ -1567,7 +1646,8 @@ static void do_io(struct io_thread_req *req)
start = end;
} while(start < nsectors);
- req->error = update_bitmap(req);
+ req->offset += len;
+ req->error = update_bitmap(req, desc);
}
/* Changed in start_io_thread, which is serialized by being called only
@@ -1600,8 +1680,13 @@ int io_thread(void *arg)
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
+ struct io_thread_req *req = (*io_req_buffer)[count];
+ int i;
+
io_count++;
- do_io((*io_req_buffer)[count]);
+ for (i = 0; !req->error && i < req->desc_cnt; i++)
+ do_io(req, &(req->io_desc[i]));
+
}
written = 0;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 555203e3e7b4..47a02e60898d 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1196,9 +1196,9 @@ static int vector_net_close(struct net_device *dev)
/* TX tasklet */
-static void vector_tx_poll(unsigned long data)
+static void vector_tx_poll(struct tasklet_struct *t)
{
- struct vector_private *vp = (struct vector_private *)data;
+ struct vector_private *vp = from_tasklet(vp, t, tx_poll);
vp->estats.tx_kicks++;
vector_send(vp->tx_queue);
@@ -1271,7 +1271,7 @@ static int vector_net_open(struct net_device *dev)
irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
IRQ_READ, vector_rx_interrupt,
IRQF_SHARED, dev->name, dev);
- if (err != 0) {
+ if (err < 0) {
netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
err = -ENETUNREACH;
goto out_close;
@@ -1286,7 +1286,7 @@ static int vector_net_open(struct net_device *dev)
irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
IRQ_WRITE, vector_tx_interrupt,
IRQF_SHARED, dev->name, dev);
- if (err != 0) {
+ if (err < 0) {
netdev_err(dev,
"vector_open: failed to get tx irq(%d)\n", err);
err = -ENETUNREACH;
@@ -1629,7 +1629,7 @@ static void vector_eth_configure(
});
dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
- tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+ tasklet_setup(&vp->tx_poll, vector_tx_poll);
INIT_WORK(&vp->reset_tx, vector_reset_tx);
timer_setup(&vp->tl, vector_timer_expire, 0);
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index a6c4bb6c2c01..27e92d3881ff 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -33,11 +33,6 @@
#include <os.h>
#include "vhost_user.h"
-/* Workaround due to a conflict between irq_user.h and irqreturn.h */
-#ifdef IRQ_NONE
-#undef IRQ_NONE
-#endif
-
#define MAX_SUPPORTED_QUEUE_SIZE 256
#define to_virtio_uml_device(_vdev) \
@@ -55,7 +50,7 @@ struct virtio_uml_device {
struct platform_device *pdev;
spinlock_t sock_lock;
- int sock, req_fd;
+ int sock, req_fd, irq;
u64 features;
u64 protocol_features;
u8 status;
@@ -409,12 +404,14 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
return rc;
vu_dev->req_fd = req_fds[0];
- rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
+ rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
vu_req_interrupt, IRQF_SHARED,
vu_dev->pdev->name, vu_dev);
- if (rc)
+ if (rc < 0)
goto err_close;
+ vu_dev->irq = rc;
+
rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
req_fds[1]);
if (rc)
@@ -423,7 +420,7 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
goto out;
err_free_irq:
- um_free_irq(VIRTIO_IRQ, vu_dev);
+ um_free_irq(vu_dev->irq, vu_dev);
err_close:
os_close_file(req_fds[0]);
out:
@@ -802,7 +799,11 @@ static void vu_del_vq(struct virtqueue *vq)
struct virtio_uml_vq_info *info = vq->priv;
if (info->call_fd >= 0) {
- um_free_irq(VIRTIO_IRQ, vq);
+ struct virtio_uml_device *vu_dev;
+
+ vu_dev = to_virtio_uml_device(vq->vdev);
+
+ um_free_irq(vu_dev->irq, vq);
os_close_file(info->call_fd);
}
@@ -852,9 +853,9 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
return rc;
info->call_fd = call_fds[0];
- rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
+ rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
vu_interrupt, IRQF_SHARED, info->name, vq);
- if (rc)
+ if (rc < 0)
goto close_both;
rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
@@ -864,7 +865,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
goto out;
release_irq:
- um_free_irq(VIRTIO_IRQ, vq);
+ um_free_irq(vu_dev->irq, vq);
close_both:
os_close_file(call_fds[0]);
out:
@@ -969,7 +970,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
error_setup:
if (info->call_fd >= 0) {
- um_free_irq(VIRTIO_IRQ, vq);
+ um_free_irq(vu_dev->irq, vq);
os_close_file(info->call_fd);
}
error_call:
@@ -1078,7 +1079,7 @@ static void virtio_uml_release_dev(struct device *d)
/* might not have been opened due to not negotiating the feature */
if (vu_dev->req_fd >= 0) {
- um_free_irq(VIRTIO_IRQ, vu_dev);
+ um_free_irq(vu_dev->irq, vu_dev);
os_close_file(vu_dev->req_fd);
}
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index fc7f1e746703..87ca4a47cd66 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -18,6 +18,7 @@
struct xterm_chan {
int pid;
int helper_pid;
+ int chan_fd;
char *title;
int device;
int raw;
@@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts)
return NULL;
*data = ((struct xterm_chan) { .pid = -1,
.helper_pid = -1,
+ .chan_fd = -1,
.device = device,
.title = opts->xterm_title,
.raw = opts->raw } );
@@ -149,6 +151,7 @@ static int xterm_open(int input, int output, int primary, void *d,
goto out_kill;
}
+ data->chan_fd = fd;
new = xterm_fd(fd, &data->helper_pid);
if (new < 0) {
err = new;
@@ -206,6 +209,8 @@ static void xterm_close(int fd, void *d)
os_kill_process(data->helper_pid, 0);
data->helper_pid = -1;
+ if (data->chan_fd != -1)
+ os_close_file(data->chan_fd);
os_close_file(fd);
}
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index d64ef6d0d463..50f11b7b4774 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -51,7 +51,7 @@ int xterm_fd(int socket, int *pid_out)
err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt,
IRQF_SHARED, "xterm", data);
- if (err) {
+ if (err < 0) {
printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, "
"err = %d\n", err);
ret = err;
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index 42c6205e2dc4..547bff7b3a89 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -17,21 +17,20 @@
#define TELNETD_IRQ 12
#define XTERM_IRQ 13
#define RANDOM_IRQ 14
-#define VIRTIO_IRQ 15
#ifdef CONFIG_UML_NET_VECTOR
-#define VECTOR_BASE_IRQ (VIRTIO_IRQ + 1)
+#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
#define VECTOR_IRQ_SPACE 8
-#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
+#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
#else
-#define LAST_IRQ VIRTIO_IRQ
+#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
#endif
-#define NR_IRQS (LAST_IRQ + 1)
+#define NR_IRQS 64
#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index def376194dce..39376bb63abf 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -55,12 +55,15 @@ extern unsigned long end_iomem;
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
#define __PAGE_KERNEL_EXEC \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO \
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
/*
* The i386 can't do page protection for execute, and considers that the same
diff --git a/arch/um/include/asm/set_memory.h b/arch/um/include/asm/set_memory.h
new file mode 100644
index 000000000000..24266c63720d
--- /dev/null
+++ b/arch/um/include/asm/set_memory.h
@@ -0,0 +1 @@
+#include <asm-generic/set_memory.h>
diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h
index f3b03d39a854..68e45e950137 100644
--- a/arch/um/include/linux/time-internal.h
+++ b/arch/um/include/linux/time-internal.h
@@ -28,7 +28,7 @@ struct time_travel_event {
extern enum time_travel_mode time_travel_mode;
-void time_travel_sleep(unsigned long long duration);
+void time_travel_sleep(void);
static inline void
time_travel_set_event_fn(struct time_travel_event *e,
@@ -60,7 +60,7 @@ struct time_travel_event {
#define time_travel_mode TT_MODE_OFF
-static inline void time_travel_sleep(unsigned long long duration)
+static inline void time_travel_sleep(void)
{
}
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 4e99fe05576a..16a51a8c800f 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -40,3 +40,6 @@ DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86);
#ifdef CONFIG_64BIT
DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
#endif
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
+#endif
diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h
index 7cd1a10c6244..7807de593bda 100644
--- a/arch/um/include/shared/irq_kern.h
+++ b/arch/um/include/shared/irq_kern.h
@@ -8,11 +8,12 @@
#include <linux/interrupt.h>
#include <asm/ptrace.h>
+#include "irq_user.h"
-extern int um_request_irq(unsigned int irq, int fd, int type,
- irq_handler_t handler,
- unsigned long irqflags, const char * devname,
- void *dev_id);
-void um_free_irq(unsigned int irq, void *dev);
-#endif
+#define UM_IRQ_ALLOC -1
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id);
+void um_free_irq(int irq, void *dev_id);
+#endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 107751dce153..07239e801a5b 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -9,25 +9,12 @@
#include <sysdep/ptrace.h>
#include <stdbool.h>
-struct irq_fd {
- struct irq_fd *next;
- void *id;
- int fd;
- int type;
- int irq;
- int events;
- bool active;
- bool pending;
- bool purge;
+enum um_irq_type {
+ IRQ_READ,
+ IRQ_WRITE,
+ NUM_IRQ_TYPES,
};
-#define IRQ_READ 0
-#define IRQ_WRITE 1
-#define IRQ_NONE 2
-#define MAX_IRQ_TYPE (IRQ_NONE + 1)
-
-
-
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void free_irq_by_fd(int fd);
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index ccafb62e8cce..d8c279e3312f 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -19,7 +19,7 @@ extern int kmalloc_ok;
#define UML_ROUND_UP(addr) \
((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
-extern unsigned long alloc_stack(int order, int atomic);
+extern unsigned long alloc_stack(int atomic);
extern void free_stack(unsigned long stack, int order);
struct pt_regs;
@@ -39,6 +39,8 @@ extern int is_syscall(unsigned long addr);
extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
+extern void uml_pm_wake(void);
+
extern int start_uml(void);
extern void paging_init(void);
@@ -66,5 +68,6 @@ extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void fatal_sigsegv(void) __attribute__ ((noreturn));
+void um_idle_sleep(void);
#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index f467d28fc0b4..13d86f94cf0f 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -233,6 +233,7 @@ extern void timer_set_signal_handler(void);
extern void set_sigstack(void *sig_stack, int size);
extern void remove_sigstack(void);
extern void set_handler(int sig);
+extern void send_sigio_to_self(void);
extern int change_sig(int signal, int on);
extern void block_signals(void);
extern void unblock_signals(void);
@@ -241,6 +242,7 @@ extern int set_signals(int enable);
extern int set_signals_trace(int enable);
extern int os_is_signal_stack(void);
extern void deliver_alarm(void);
+extern void register_pm_wake_signal(void);
/* util.c */
extern void stack_protections(unsigned long address);
@@ -256,7 +258,7 @@ extern void os_warn(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
/* time.c */
-extern void os_idle_sleep(unsigned long long nsecs);
+extern void os_idle_sleep(void);
extern int os_timer_create(void);
extern int os_timer_set_interval(unsigned long long nsecs);
extern int os_timer_one_shot(unsigned long long nsecs);
@@ -299,19 +301,29 @@ extern void reboot_skas(void);
extern int os_waiting_for_events_epoll(void);
extern void *os_epoll_get_data_pointer(int index);
extern int os_epoll_triggered(int index, int events);
-extern int os_event_mask(int irq_type);
+extern int os_event_mask(enum um_irq_type irq_type);
extern int os_setup_epoll(void);
extern int os_add_epoll_fd(int events, int fd, void *data);
extern int os_mod_epoll_fd(int events, int fd, void *data);
extern int os_del_epoll_fd(int fd);
extern void os_set_ioignore(void);
extern void os_close_epoll_fd(void);
+extern void um_irqs_suspend(void);
+extern void um_irqs_resume(void);
/* sigio.c */
extern int add_sigio_fd(int fd);
extern int ignore_sigio_fd(int fd);
-extern void maybe_sigio_broken(int fd, int read);
-extern void sigio_broken(int fd, int read);
+extern void maybe_sigio_broken(int fd);
+extern void sigio_broken(int fd);
+/*
+ * unlocked versions for IRQ controller code.
+ *
+ * This is safe because it's used at suspend/resume and nothing
+ * else is running.
+ */
+extern int __add_sigio_fd(int fd);
+extern int __ignore_sigio_fd(int fd);
/* prctl.c */
extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
@@ -330,4 +342,7 @@ extern void unblock_signals_trace(void);
extern void um_trace_signals_on(void);
extern void um_trace_signals_off(void);
+/* time-travel */
+extern void deliver_time_travel_irqs(void);
+
#endif
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3577118bb4a5..3741d2380060 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -19,27 +19,40 @@
#include <kern_util.h>
#include <os.h>
#include <irq_user.h>
+#include <irq_kern.h>
+#include <as-layout.h>
extern void free_irqs(void);
/* When epoll triggers we do not know why it did so
* we can also have different IRQs for read and write.
- * This is why we keep a small irq_fd array for each fd -
+ * This is why we keep a small irq_reg array for each fd -
* one entry per IRQ type
*/
+struct irq_reg {
+ void *id;
+ int irq;
+ /* it's cheaper to store this than to query it */
+ int events;
+ bool active;
+ bool pending;
+ bool wakeup;
+};
struct irq_entry {
- struct irq_entry *next;
+ struct list_head list;
int fd;
- struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+ struct irq_reg reg[NUM_IRQ_TYPES];
+ bool suspended;
+ bool sigio_workaround;
};
-static struct irq_entry *active_fds;
-
static DEFINE_SPINLOCK(irq_lock);
+static LIST_HEAD(active_fds);
+static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
-static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{
/*
* irq->active guards against reentry
@@ -49,23 +62,27 @@ static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
*/
if (irq->active) {
irq->active = false;
+
do {
irq->pending = false;
do_IRQ(irq->irq, regs);
- } while (irq->pending && (!irq->purge));
- if (!irq->purge)
- irq->active = true;
+ } while (irq->pending);
+
+ irq->active = true;
} else {
irq->pending = true;
}
}
+void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+{
+ /* nothing */
+}
+
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
struct irq_entry *irq_entry;
- struct irq_fd *irq;
-
- int n, i, j;
+ int n, i;
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
@@ -84,21 +101,18 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
}
for (i = 0; i < n ; i++) {
- /* Epoll back reference is the entry with 3 irq_fd
- * leaves - one for each irq type.
- */
- irq_entry = (struct irq_entry *)
- os_epoll_get_data_pointer(i);
- for (j = 0; j < MAX_IRQ_TYPE ; j++) {
- irq = irq_entry->irq_array[j];
- if (irq == NULL)
+ enum um_irq_type t;
+
+ irq_entry = os_epoll_get_data_pointer(i);
+
+ for (t = 0; t < NUM_IRQ_TYPES; t++) {
+ int events = irq_entry->reg[t].events;
+
+ if (!events)
continue;
- if (os_epoll_triggered(i, irq->events) > 0)
- irq_io_loop(irq, regs);
- if (irq->purge) {
- irq_entry->irq_array[j] = NULL;
- kfree(irq);
- }
+
+ if (os_epoll_triggered(i, events) > 0)
+ irq_io_loop(&irq_entry->reg[t], regs);
}
}
}
@@ -106,32 +120,59 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
free_irqs();
}
-static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
{
- int i;
- int events = 0;
- struct irq_fd *irq;
+ struct irq_entry *walk;
- for (i = 0; i < MAX_IRQ_TYPE ; i++) {
- irq = irq_entry->irq_array[i];
- if (irq != NULL)
- events = irq->events | events;
- }
- if (events > 0) {
- /* os_add_epoll will call os_mod_epoll if this already exists */
- return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+ lockdep_assert_held(&irq_lock);
+
+ list_for_each_entry(walk, &active_fds, list) {
+ if (walk->fd == fd)
+ return walk;
}
- /* No events - delete */
- return os_del_epoll_fd(irq_entry->fd);
+
+ return NULL;
}
+static void free_irq_entry(struct irq_entry *to_free, bool remove)
+{
+ if (!to_free)
+ return;
+
+ if (remove)
+ os_del_epoll_fd(to_free->fd);
+ list_del(&to_free->list);
+ kfree(to_free);
+}
+
+static bool update_irq_entry(struct irq_entry *entry)
+{
+ enum um_irq_type i;
+ int events = 0;
+
+ for (i = 0; i < NUM_IRQ_TYPES; i++)
+ events |= entry->reg[i].events;
+
+ if (events) {
+ /* will modify (instead of add) if needed */
+ os_add_epoll_fd(events, entry->fd, entry);
+ return true;
+ }
+
+ os_del_epoll_fd(entry->fd);
+ return false;
+}
+static void update_or_free_irq_entry(struct irq_entry *entry)
+{
+ if (!update_irq_entry(entry))
+ free_irq_entry(entry, false);
+}
-static int activate_fd(int irq, int fd, int type, void *dev_id)
+static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
{
- struct irq_fd *new_fd;
struct irq_entry *irq_entry;
- int i, err, events;
+ int err, events = os_event_mask(type);
unsigned long flags;
err = os_set_fd_async(fd);
@@ -139,73 +180,34 @@ static int activate_fd(int irq, int fd, int type, void *dev_id)
goto out;
spin_lock_irqsave(&irq_lock, flags);
-
- /* Check if we have an entry for this fd */
-
- err = -EBUSY;
- for (irq_entry = active_fds;
- irq_entry != NULL; irq_entry = irq_entry->next) {
- if (irq_entry->fd == fd)
- break;
- }
-
- if (irq_entry == NULL) {
- /* This needs to be atomic as it may be called from an
- * IRQ context.
- */
- irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
- if (irq_entry == NULL) {
- printk(KERN_ERR
- "Failed to allocate new IRQ entry\n");
+ irq_entry = get_irq_entry_by_fd(fd);
+ if (irq_entry) {
+ /* cannot register the same FD twice with the same type */
+ if (WARN_ON(irq_entry->reg[type].events)) {
+ err = -EALREADY;
goto out_unlock;
}
- irq_entry->fd = fd;
- for (i = 0; i < MAX_IRQ_TYPE; i++)
- irq_entry->irq_array[i] = NULL;
- irq_entry->next = active_fds;
- active_fds = irq_entry;
- }
-
- /* Check if we are trying to re-register an interrupt for a
- * particular fd
- */
- if (irq_entry->irq_array[type] != NULL) {
- printk(KERN_ERR
- "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
- irq, fd, type, dev_id
- );
- goto out_unlock;
+ /* temporarily disable to avoid IRQ-side locking */
+ os_del_epoll_fd(fd);
} else {
- /* New entry for this fd */
-
- err = -ENOMEM;
- new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
- if (new_fd == NULL)
+ irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
+ if (!irq_entry) {
+ err = -ENOMEM;
goto out_unlock;
-
- events = os_event_mask(type);
-
- *new_fd = ((struct irq_fd) {
- .id = dev_id,
- .irq = irq,
- .type = type,
- .events = events,
- .active = true,
- .pending = false,
- .purge = false
- });
- /* Turn off any IO on this fd - allows us to
- * avoid locking the IRQ loop
- */
- os_del_epoll_fd(irq_entry->fd);
- irq_entry->irq_array[type] = new_fd;
+ }
+ irq_entry->fd = fd;
+ list_add_tail(&irq_entry->list, &active_fds);
+ maybe_sigio_broken(fd);
}
- /* Turn back IO on with the correct (new) IO event mask */
- assign_epoll_events_to_irq(irq_entry);
+ irq_entry->reg[type].id = dev_id;
+ irq_entry->reg[type].irq = irq;
+ irq_entry->reg[type].active = true;
+ irq_entry->reg[type].events = events;
+
+ WARN_ON(!update_irq_entry(irq_entry));
spin_unlock_irqrestore(&irq_lock, flags);
- maybe_sigio_broken(fd, (type != IRQ_NONE));
return 0;
out_unlock:
@@ -215,104 +217,10 @@ out:
}
/*
- * Walk the IRQ list and dispose of any unused entries.
- * Should be done under irq_lock.
+ * Remove the entry or entries for a specific FD, if you
+ * don't want to remove all the possible entries then use
+ * um_free_irq() or deactivate_fd() instead.
*/
-
-static void garbage_collect_irq_entries(void)
-{
- int i;
- bool reap;
- struct irq_entry *walk;
- struct irq_entry *previous = NULL;
- struct irq_entry *to_free;
-
- if (active_fds == NULL)
- return;
- walk = active_fds;
- while (walk != NULL) {
- reap = true;
- for (i = 0; i < MAX_IRQ_TYPE ; i++) {
- if (walk->irq_array[i] != NULL) {
- reap = false;
- break;
- }
- }
- if (reap) {
- if (previous == NULL)
- active_fds = walk->next;
- else
- previous->next = walk->next;
- to_free = walk;
- } else {
- to_free = NULL;
- }
- walk = walk->next;
- kfree(to_free);
- }
-}
-
-/*
- * Walk the IRQ list and get the descriptor for our FD
- */
-
-static struct irq_entry *get_irq_entry_by_fd(int fd)
-{
- struct irq_entry *walk = active_fds;
-
- while (walk != NULL) {
- if (walk->fd == fd)
- return walk;
- walk = walk->next;
- }
- return NULL;
-}
-
-
-/*
- * Walk the IRQ list and dispose of an entry for a specific
- * device, fd and number. Note - if sharing an IRQ for read
- * and writefor the same FD it will be disposed in either case.
- * If this behaviour is undesirable use different IRQ ids.
- */
-
-#define IGNORE_IRQ 1
-#define IGNORE_DEV (1<<1)
-
-static void do_free_by_irq_and_dev(
- struct irq_entry *irq_entry,
- unsigned int irq,
- void *dev,
- int flags
-)
-{
- int i;
- struct irq_fd *to_free;
-
- for (i = 0; i < MAX_IRQ_TYPE ; i++) {
- if (irq_entry->irq_array[i] != NULL) {
- if (
- ((flags & IGNORE_IRQ) ||
- (irq_entry->irq_array[i]->irq == irq)) &&
- ((flags & IGNORE_DEV) ||
- (irq_entry->irq_array[i]->id == dev))
- ) {
- /* Turn off any IO on this fd - allows us to
- * avoid locking the IRQ loop
- */
- os_del_epoll_fd(irq_entry->fd);
- to_free = irq_entry->irq_array[i];
- irq_entry->irq_array[i] = NULL;
- assign_epoll_events_to_irq(irq_entry);
- if (to_free->active)
- to_free->purge = true;
- else
- kfree(to_free);
- }
- }
- }
-}
-
void free_irq_by_fd(int fd)
{
struct irq_entry *to_free;
@@ -320,58 +228,64 @@ void free_irq_by_fd(int fd)
spin_lock_irqsave(&irq_lock, flags);
to_free = get_irq_entry_by_fd(fd);
- if (to_free != NULL) {
- do_free_by_irq_and_dev(
- to_free,
- -1,
- NULL,
- IGNORE_IRQ | IGNORE_DEV
- );
- }
- garbage_collect_irq_entries();
+ free_irq_entry(to_free, true);
spin_unlock_irqrestore(&irq_lock, flags);
}
EXPORT_SYMBOL(free_irq_by_fd);
static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
- struct irq_entry *to_free;
+ struct irq_entry *entry;
unsigned long flags;
spin_lock_irqsave(&irq_lock, flags);
- to_free = active_fds;
- while (to_free != NULL) {
- do_free_by_irq_and_dev(
- to_free,
- irq,
- dev,
- 0
- );
- to_free = to_free->next;
+ list_for_each_entry(entry, &active_fds, list) {
+ enum um_irq_type i;
+
+ for (i = 0; i < NUM_IRQ_TYPES; i++) {
+ struct irq_reg *reg = &entry->reg[i];
+
+ if (!reg->events)
+ continue;
+ if (reg->irq != irq)
+ continue;
+ if (reg->id != dev)
+ continue;
+
+ os_del_epoll_fd(entry->fd);
+ reg->events = 0;
+ update_or_free_irq_entry(entry);
+ goto out;
+ }
}
- garbage_collect_irq_entries();
+out:
spin_unlock_irqrestore(&irq_lock, flags);
}
-
void deactivate_fd(int fd, int irqnum)
{
- struct irq_entry *to_free;
+ struct irq_entry *entry;
unsigned long flags;
+ enum um_irq_type i;
os_del_epoll_fd(fd);
+
spin_lock_irqsave(&irq_lock, flags);
- to_free = get_irq_entry_by_fd(fd);
- if (to_free != NULL) {
- do_free_by_irq_and_dev(
- to_free,
- irqnum,
- NULL,
- IGNORE_DEV
- );
+ entry = get_irq_entry_by_fd(fd);
+ if (!entry)
+ goto out;
+
+ for (i = 0; i < NUM_IRQ_TYPES; i++) {
+ if (!entry->reg[i].events)
+ continue;
+ if (entry->reg[i].irq == irqnum)
+ entry->reg[i].events = 0;
}
- garbage_collect_irq_entries();
+
+ update_or_free_irq_entry(entry);
+out:
spin_unlock_irqrestore(&irq_lock, flags);
+
ignore_sigio_fd(fd);
}
EXPORT_SYMBOL(deactivate_fd);
@@ -384,24 +298,17 @@ EXPORT_SYMBOL(deactivate_fd);
*/
int deactivate_all_fds(void)
{
- struct irq_entry *to_free;
+ struct irq_entry *entry;
/* Stop IO. The IRQ loop has no lock so this is our
* only way of making sure we are safe to dispose
* of all IRQ handlers
*/
os_set_ioignore();
- to_free = active_fds;
- while (to_free != NULL) {
- do_free_by_irq_and_dev(
- to_free,
- -1,
- NULL,
- IGNORE_IRQ | IGNORE_DEV
- );
- to_free = to_free->next;
- }
- /* don't garbage collect - we can no longer call kfree() here */
+
+ /* we can no longer call kfree() here so just deactivate */
+ list_for_each_entry(entry, &active_fds, list)
+ os_del_epoll_fd(entry->fd);
os_close_epoll_fd();
return 0;
}
@@ -421,31 +328,146 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
return 1;
}
-void um_free_irq(unsigned int irq, void *dev)
+void um_free_irq(int irq, void *dev)
{
+ if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
+ return;
+
free_irq_by_irq_and_dev(irq, dev);
free_irq(irq, dev);
+ clear_bit(irq, irqs_allocated);
}
EXPORT_SYMBOL(um_free_irq);
-int um_request_irq(unsigned int irq, int fd, int type,
- irq_handler_t handler,
- unsigned long irqflags, const char * devname,
- void *dev_id)
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id)
{
int err;
+ if (irq == UM_IRQ_ALLOC) {
+ int i;
+
+ for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
+ if (!test_and_set_bit(i, irqs_allocated)) {
+ irq = i;
+ break;
+ }
+ }
+ }
+
+ if (irq < 0)
+ return -ENOSPC;
+
if (fd != -1) {
err = activate_fd(irq, fd, type, dev_id);
if (err)
- return err;
+ goto error;
}
- return request_irq(irq, handler, irqflags, devname, dev_id);
-}
+ err = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (err < 0)
+ goto error;
+ return irq;
+error:
+ clear_bit(irq, irqs_allocated);
+ return err;
+}
EXPORT_SYMBOL(um_request_irq);
+#ifdef CONFIG_PM_SLEEP
+void um_irqs_suspend(void)
+{
+ struct irq_entry *entry;
+ unsigned long flags;
+
+ sig_info[SIGIO] = sigio_handler_suspend;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ list_for_each_entry(entry, &active_fds, list) {
+ enum um_irq_type t;
+ bool wake = false;
+
+ for (t = 0; t < NUM_IRQ_TYPES; t++) {
+ if (!entry->reg[t].events)
+ continue;
+
+ /*
+ * For the SIGIO_WRITE_IRQ, which is used to handle the
+ * SIGIO workaround thread, we need special handling:
+ * enable wake for it itself, but below we tell it about
+ * any FDs that should be suspended.
+ */
+ if (entry->reg[t].wakeup ||
+ entry->reg[t].irq == SIGIO_WRITE_IRQ) {
+ wake = true;
+ break;
+ }
+ }
+
+ if (!wake) {
+ entry->suspended = true;
+ os_clear_fd_async(entry->fd);
+ entry->sigio_workaround =
+ !__ignore_sigio_fd(entry->fd);
+ }
+ }
+ spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+void um_irqs_resume(void)
+{
+ struct irq_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ list_for_each_entry(entry, &active_fds, list) {
+ if (entry->suspended) {
+ int err = os_set_fd_async(entry->fd);
+
+ WARN(err < 0, "os_set_fd_async returned %d\n", err);
+ entry->suspended = false;
+
+ if (entry->sigio_workaround) {
+ err = __add_sigio_fd(entry->fd);
+ WARN(err < 0, "add_sigio_returned %d\n", err);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&irq_lock, flags);
+
+ sig_info[SIGIO] = sigio_handler;
+ send_sigio_to_self();
+}
+
+static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+ struct irq_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_lock, flags);
+ list_for_each_entry(entry, &active_fds, list) {
+ enum um_irq_type t;
+
+ for (t = 0; t < NUM_IRQ_TYPES; t++) {
+ if (!entry->reg[t].events)
+ continue;
+
+ if (entry->reg[t].irq != d->irq)
+ continue;
+ entry->reg[t].wakeup = on;
+ goto unlock;
+ }
+ }
+unlock:
+ spin_unlock_irqrestore(&irq_lock, flags);
+ return 0;
+}
+#else
+#define normal_irq_set_wake NULL
+#endif
+
/*
* irq_chip must define at least enable/disable and ack when
* the edge handler is used.
@@ -454,7 +476,7 @@ static void dummy(struct irq_data *d)
{
}
-/* This is used for everything else than the timer. */
+/* This is used for everything other than the timer. */
static struct irq_chip normal_irq_type = {
.name = "SIGIO",
.irq_disable = dummy,
@@ -462,10 +484,11 @@ static struct irq_chip normal_irq_type = {
.irq_ack = dummy,
.irq_mask = dummy,
.irq_unmask = dummy,
+ .irq_set_wake = normal_irq_set_wake,
};
-static struct irq_chip SIGVTALRM_irq_type = {
- .name = "SIGVTALRM",
+static struct irq_chip alarm_irq_type = {
+ .name = "SIGALRM",
.irq_disable = dummy,
.irq_enable = dummy,
.irq_ack = dummy,
@@ -477,10 +500,9 @@ void __init init_IRQ(void)
{
int i;
- irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
-
+ irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
- for (i = 1; i <= LAST_IRQ; i++)
+ for (i = 1; i < NR_IRQS; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
/* Initialize EPOLL Loop */
os_setup_epoll();
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 0fcdc374a9a1..2a986ece5478 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -32,6 +32,7 @@
#include <os.h>
#include <skas.h>
#include <linux/time-internal.h>
+#include <asm/set_memory.h>
/*
* This is a per-cpu array. A processor only modifies its entry and it only
@@ -62,16 +63,18 @@ void free_stack(unsigned long stack, int order)
free_pages(stack, order);
}
-unsigned long alloc_stack(int order, int atomic)
+unsigned long alloc_stack(int atomic)
{
- unsigned long page;
+ unsigned long addr;
gfp_t flags = GFP_KERNEL;
if (atomic)
flags = GFP_ATOMIC;
- page = __get_free_pages(flags, order);
+ addr = __get_free_pages(flags, 1);
- return page;
+ set_memory_ro(addr, 1);
+
+ return addr + PAGE_SIZE;
}
static inline void set_current(struct task_struct *task)
@@ -203,15 +206,12 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
kmalloc_ok = save_kmalloc_ok;
}
-static void um_idle_sleep(void)
+void um_idle_sleep(void)
{
- unsigned long long duration = UM_NSEC_PER_SEC;
-
- if (time_travel_mode != TT_MODE_OFF) {
- time_travel_sleep(duration);
- } else {
- os_idle_sleep(duration);
- }
+ if (time_travel_mode != TT_MODE_OFF)
+ time_travel_sleep();
+ else
+ os_idle_sleep();
}
void arch_cpu_idle(void)
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index d1cffc2a7f21..5085a50c3b8c 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -25,7 +25,7 @@ int write_sigio_irq(int fd)
err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
0, "write sigio", NULL);
- if (err) {
+ if (err < 0) {
printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
"err = %d\n", err);
return -1;
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 3d109ff3309b..f4db89b5b5a6 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -31,6 +31,7 @@ static bool time_travel_start_set;
static unsigned long long time_travel_start;
static unsigned long long time_travel_time;
static LIST_HEAD(time_travel_events);
+static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval;
static unsigned long long time_travel_next_event;
static struct time_travel_event time_travel_timer_event;
@@ -46,6 +47,9 @@ static void time_travel_set_time(unsigned long long ns)
if (unlikely(ns < time_travel_time))
panic("time-travel: time goes backwards %lld -> %lld\n",
time_travel_time, ns);
+ else if (unlikely(ns >= S64_MAX))
+ panic("The system was going to sleep forever, aborting");
+
time_travel_time = ns;
}
@@ -180,6 +184,14 @@ static void time_travel_ext_update_request(unsigned long long time)
time == time_travel_ext_prev_request)
return;
+ /*
+ * if we're running and are allowed to run past the request
+ * then we don't need to update it either
+ */
+ if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
+ time < time_travel_ext_free_until)
+ return;
+
time_travel_ext_prev_request = time;
time_travel_ext_prev_request_valid = true;
time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
@@ -187,7 +199,13 @@ static void time_travel_ext_update_request(unsigned long long time)
void __time_travel_propagate_time(void)
{
+ static unsigned long long last_propagated;
+
+ if (last_propagated == time_travel_time)
+ return;
+
time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
+ last_propagated = time_travel_time;
}
EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
@@ -214,6 +232,7 @@ static void time_travel_ext_wait(bool idle)
};
time_travel_ext_prev_request_valid = false;
+ time_travel_ext_free_until_valid = false;
time_travel_ext_waiting++;
time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -260,11 +279,6 @@ static void __time_travel_add_event(struct time_travel_event *e,
struct time_travel_event *tmp;
bool inserted = false;
- if (WARN(time_travel_mode == TT_MODE_BASIC &&
- e != &time_travel_timer_event,
- "only timer events can be handled in basic mode"))
- return;
-
if (e->pending)
return;
@@ -311,6 +325,35 @@ void time_travel_periodic_timer(struct time_travel_event *e)
deliver_alarm();
}
+void deliver_time_travel_irqs(void)
+{
+ struct time_travel_event *e;
+ unsigned long flags;
+
+ /*
+ * Don't do anything for most cases. Note that because here we have
+ * to disable IRQs (and re-enable later) we'll actually recurse at
+ * the end of the function, so this is strictly necessary.
+ */
+ if (likely(list_empty(&time_travel_irqs)))
+ return;
+
+ local_irq_save(flags);
+ irq_enter();
+ while ((e = list_first_entry_or_null(&time_travel_irqs,
+ struct time_travel_event,
+ list))) {
+ WARN(e->time != time_travel_time,
+ "time moved from %lld to %lld before IRQ delivery\n",
+ time_travel_time, e->time);
+ list_del(&e->list);
+ e->pending = false;
+ e->fn(e);
+ }
+ irq_exit();
+ local_irq_restore(flags);
+}
+
static void time_travel_deliver_event(struct time_travel_event *e)
{
if (e == &time_travel_timer_event) {
@@ -319,6 +362,14 @@ static void time_travel_deliver_event(struct time_travel_event *e)
* by itself, so must handle it specially here
*/
e->fn(e);
+ } else if (irqs_disabled()) {
+ list_add_tail(&e->list, &time_travel_irqs);
+ /*
+ * set pending again, it was set to false when the
+ * event was deleted from the original list, but
+ * now it's still pending until we deliver the IRQ.
+ */
+ e->pending = true;
} else {
unsigned long flags;
@@ -404,9 +455,14 @@ static void time_travel_oneshot_timer(struct time_travel_event *e)
deliver_alarm();
}
-void time_travel_sleep(unsigned long long duration)
+void time_travel_sleep(void)
{
- unsigned long long next = time_travel_time + duration;
+ /*
+ * Wait "forever" (using S64_MAX because there are some potential
+ * wrapping issues, especially with the current TT_MODE_EXTERNAL
+ * controller application.
+ */
+ unsigned long long next = S64_MAX;
if (time_travel_mode == TT_MODE_BASIC)
os_timer_disable();
@@ -483,6 +539,7 @@ invalid_number:
#define time_travel_start_set 0
#define time_travel_start 0
#define time_travel_time 0
+#define time_travel_ext_waiting 0
static inline void time_travel_update_time(unsigned long long ns, bool retearly)
{
@@ -628,7 +685,8 @@ static u64 timer_read(struct clocksource *cs)
* "what do I do next" and onstack event we use to know when
* to return from time_travel_update_time().
*/
- if (!irqs_disabled() && !in_interrupt() && !in_softirq())
+ if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
+ !time_travel_ext_waiting)
time_travel_update_time(time_travel_time +
TIMER_MULTIPLIER,
false);
@@ -673,10 +731,8 @@ void read_persistent_clock64(struct timespec64 *ts)
{
long long nsecs;
- if (time_travel_start_set)
+ if (time_travel_mode != TT_MODE_OFF)
nsecs = time_travel_start + time_travel_time;
- else if (time_travel_mode == TT_MODE_EXTERNAL)
- nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
else
nsecs = os_persistent_clock_emulation();
@@ -686,6 +742,25 @@ void read_persistent_clock64(struct timespec64 *ts)
void __init time_init(void)
{
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ switch (time_travel_mode) {
+ case TT_MODE_EXTERNAL:
+ time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
+ /* controller gave us the *current* time, so adjust by that */
+ time_travel_ext_get_time();
+ time_travel_start -= time_travel_time;
+ break;
+ case TT_MODE_INFCPU:
+ case TT_MODE_BASIC:
+ if (!time_travel_start_set)
+ time_travel_start = os_persistent_clock_emulation();
+ break;
+ case TT_MODE_OFF:
+ /* we just read the host clock with os_persistent_clock_emulation() */
+ break;
+ }
+#endif
+
timer_set_signal_handler();
late_time_init = um_timer_setup;
}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 61776790cd67..437d1f1cc5ec 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -608,3 +608,57 @@ void force_flush_all(void)
vma = vma->vm_next;
}
}
+
+struct page_change_data {
+ unsigned int set_mask, clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
+{
+ struct page_change_data *cdata = data;
+ pte_t pte = READ_ONCE(*ptep);
+
+ pte_clear_bits(pte, cdata->clear_mask);
+ pte_set_bits(pte, cdata->set_mask);
+
+ set_pte(ptep, pte);
+ return 0;
+}
+
+static int change_memory(unsigned long start, unsigned long pages,
+ unsigned int set_mask, unsigned int clear_mask)
+{
+ unsigned long size = pages * PAGE_SIZE;
+ struct page_change_data data;
+ int ret;
+
+ data.set_mask = set_mask;
+ data.clear_mask = clear_mask;
+
+ ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+ &data);
+
+ flush_tlb_kernel_range(start, start + size);
+
+ return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_memory(addr, numpages, 0, _PAGE_RW);
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_memory(addr, numpages, _PAGE_RW, 0);
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return -EOPNOTSUPP;
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return -EOPNOTSUPP;
+}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 76b37297b7d4..31d356b1ffd8 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/kmsg_dump.h>
+#include <linux/suspend.h>
#include <asm/processor.h>
#include <asm/sections.h>
@@ -377,3 +378,69 @@ void *text_poke(void *addr, const void *opcode, size_t len)
void text_poke_sync(void)
{
}
+
+void uml_pm_wake(void)
+{
+ pm_system_wakeup();
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int um_suspend_valid(suspend_state_t state)
+{
+ return state == PM_SUSPEND_MEM;
+}
+
+static int um_suspend_prepare(void)
+{
+ um_irqs_suspend();
+ return 0;
+}
+
+static int um_suspend_enter(suspend_state_t state)
+{
+ if (WARN_ON(state != PM_SUSPEND_MEM))
+ return -EINVAL;
+
+ /*
+ * This is identical to the idle sleep, but we've just
+ * (during suspend) turned off all interrupt sources
+ * except for the ones we want, so now we can only wake
+ * up on something we actually want to wake up on. All
+ * timing has also been suspended.
+ */
+ um_idle_sleep();
+ return 0;
+}
+
+static void um_suspend_finish(void)
+{
+ um_irqs_resume();
+}
+
+const struct platform_suspend_ops um_suspend_ops = {
+ .valid = um_suspend_valid,
+ .prepare = um_suspend_prepare,
+ .enter = um_suspend_enter,
+ .finish = um_suspend_finish,
+};
+
+static int init_pm_wake_signal(void)
+{
+ /*
+ * In external time-travel mode we can't use signals to wake up
+ * since that would mess with the scheduling. We'll have to do
+ * some additional work to support wakeup on virtio devices or
+ * similar, perhaps implementing a fake RTC controller that can
+ * trigger wakeup (and request the appropriate scheduling from
+ * the external scheduler when going to suspend.)
+ */
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ register_pm_wake_signal();
+
+ suspend_set_ops(&um_suspend_ops);
+
+ return 0;
+}
+
+late_initcall(init_pm_wake_signal);
+#endif
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 839915b8c31c..77ac50baa3f8 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -10,6 +10,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
registers.o sigio.o signal.o start_up.o time.o tty.o \
umid.o user_syms.o util.o drivers/ skas/
+CFLAGS_signal.o += -Wframe-larger-than=4096
+
obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index 9fa6e4187d4f..feb48d796e00 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -45,7 +45,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
unsigned long stack, sp;
int pid, fds[2], ret, n;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(__cant_sleep());
if (stack == 0)
return -ENOMEM;
@@ -116,7 +116,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
unsigned long stack, sp;
int pid, status, err;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(__cant_sleep());
if (stack == 0)
return -ENOMEM;
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index d508310ee5e1..98ea910ef87c 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -45,10 +45,10 @@ int os_epoll_triggered(int index, int events)
* access to the right includes/defines for EPOLL constants.
*/
-int os_event_mask(int irq_type)
+int os_event_mask(enum um_irq_type irq_type)
{
if (irq_type == IRQ_READ)
- return EPOLLIN | EPOLLPRI;
+ return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP;
if (irq_type == IRQ_WRITE)
return EPOLLOUT;
return 0;
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 75558080d0bf..6597ea1986ff 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -164,45 +164,55 @@ static void update_thread(void)
set_signals_trace(flags);
}
-int add_sigio_fd(int fd)
+int __add_sigio_fd(int fd)
{
struct pollfd *p;
- int err = 0, i, n;
+ int err, i, n;
- sigio_lock();
for (i = 0; i < all_sigio_fds.used; i++) {
if (all_sigio_fds.poll[i].fd == fd)
break;
}
if (i == all_sigio_fds.used)
- goto out;
+ return -ENOSPC;
p = &all_sigio_fds.poll[i];
for (i = 0; i < current_poll.used; i++) {
if (current_poll.poll[i].fd == fd)
- goto out;
+ return 0;
}
n = current_poll.used;
err = need_poll(&next_poll, n + 1);
if (err)
- goto out;
+ return err;
memcpy(next_poll.poll, current_poll.poll,
current_poll.used * sizeof(struct pollfd));
next_poll.poll[n] = *p;
next_poll.used = n + 1;
update_thread();
- out:
+
+ return 0;
+}
+
+
+int add_sigio_fd(int fd)
+{
+ int err;
+
+ sigio_lock();
+ err = __add_sigio_fd(fd);
sigio_unlock();
+
return err;
}
-int ignore_sigio_fd(int fd)
+int __ignore_sigio_fd(int fd)
{
struct pollfd *p;
- int err = 0, i, n = 0;
+ int err, i, n = 0;
/*
* This is called from exitcalls elsewhere in UML - if
@@ -212,17 +222,16 @@ int ignore_sigio_fd(int fd)
if (write_sigio_pid == -1)
return -EIO;
- sigio_lock();
for (i = 0; i < current_poll.used; i++) {
if (current_poll.poll[i].fd == fd)
break;
}
if (i == current_poll.used)
- goto out;
+ return -ENOENT;
err = need_poll(&next_poll, current_poll.used - 1);
if (err)
- goto out;
+ return err;
for (i = 0; i < current_poll.used; i++) {
p = &current_poll.poll[i];
@@ -232,8 +241,18 @@ int ignore_sigio_fd(int fd)
next_poll.used = current_poll.used - 1;
update_thread();
- out:
+
+ return 0;
+}
+
+int ignore_sigio_fd(int fd)
+{
+ int err;
+
+ sigio_lock();
+ err = __ignore_sigio_fd(fd);
sigio_unlock();
+
return err;
}
@@ -336,7 +355,7 @@ out_close1:
close(l_write_sigio_fds[1]);
}
-void sigio_broken(int fd, int read)
+void sigio_broken(int fd)
{
int err;
@@ -352,7 +371,7 @@ void sigio_broken(int fd, int read)
all_sigio_fds.poll[all_sigio_fds.used++] =
((struct pollfd) { .fd = fd,
- .events = read ? POLLIN : POLLOUT,
+ .events = POLLIN,
.revents = 0 });
out:
sigio_unlock();
@@ -360,17 +379,16 @@ out:
/* Changed during early boot */
static int pty_output_sigio;
-static int pty_close_sigio;
-void maybe_sigio_broken(int fd, int read)
+void maybe_sigio_broken(int fd)
{
if (!isatty(fd))
return;
- if ((read || pty_output_sigio) && (!read || pty_close_sigio))
+ if (pty_output_sigio)
return;
- sigio_broken(fd, read);
+ sigio_broken(fd);
}
static void sigio_cleanup(void)
@@ -514,19 +532,6 @@ static void tty_output(int master, int slave)
printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n);
}
-static void tty_close(int master, int slave)
-{
- printk(UM_KERN_INFO "Checking that host ptys support SIGIO on "
- "close...");
-
- close(slave);
- if (got_sigio) {
- printk(UM_KERN_CONT "Yes\n");
- pty_close_sigio = 1;
- } else
- printk(UM_KERN_CONT "No, enabling workaround\n");
-}
-
static void __init check_sigio(void)
{
if ((access("/dev/ptmx", R_OK) < 0) &&
@@ -536,7 +541,6 @@ static void __init check_sigio(void)
return;
}
check_one_sigio(tty_output);
- check_one_sigio(tty_close);
}
/* Here because it only does the SIGIO testing for now */
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index b58bc68cbe64..96f511d1aabe 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -136,6 +136,16 @@ void set_sigstack(void *sig_stack, int size)
panic("enabling signal stack failed, errno = %d\n", errno);
}
+static void sigusr1_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+{
+ uml_pm_wake();
+}
+
+void register_pm_wake_signal(void)
+{
+ set_handler(SIGUSR1);
+}
+
static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
[SIGSEGV] = sig_handler,
[SIGBUS] = sig_handler,
@@ -145,7 +155,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
[SIGIO] = sig_handler,
[SIGWINCH] = sig_handler,
- [SIGALRM] = timer_alarm_handler
+ [SIGALRM] = timer_alarm_handler,
+
+ [SIGUSR1] = sigusr1_handler,
};
static void hard_handler(int sig, siginfo_t *si, void *p)
@@ -222,6 +234,11 @@ void set_handler(int sig)
panic("sigprocmask failed - errno = %d\n", errno);
}
+void send_sigio_to_self(void)
+{
+ kill(os_getpid(), SIGIO);
+}
+
int change_sig(int signal, int on)
{
sigset_t sigset;
@@ -254,6 +271,9 @@ void unblock_signals(void)
return;
signals_enabled = 1;
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+ deliver_time_travel_irqs();
+#endif
/*
* We loop because the IRQ handler returns with interrupts off. So,
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 4fb877b99dde..0621d521208e 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -400,7 +400,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
if (WIFSTOPPED(status)) {
int sig = WSTOPSIG(status);
- ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
+ /* These signal handlers need the si argument.
+ * The SIGIO and SIGALARM handlers which constitute the
+ * majority of invocations, do not use it.
+ */
+ switch (sig) {
+ case SIGSEGV:
+ case SIGTRAP:
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGWINCH:
+ ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
+ break;
+ }
switch (sig) {
case SIGSEGV:
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 90f6de224c70..a61cbf73a179 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -7,6 +7,7 @@
*/
#include <stddef.h>
+#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <time.h>
@@ -99,19 +100,9 @@ long long os_nsecs(void)
}
/**
- * os_idle_sleep() - sleep for a given time of nsecs
- * @nsecs: nanoseconds to sleep
+ * os_idle_sleep() - sleep until interrupted
*/
-void os_idle_sleep(unsigned long long nsecs)
+void os_idle_sleep(void)
{
- struct timespec ts = {
- .tv_sec = nsecs / UM_NSEC_PER_SEC,
- .tv_nsec = nsecs % UM_NSEC_PER_SEC
- };
-
- /*
- * Relay the signal if clock_nanosleep is interrupted.
- */
- if (clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL))
- deliver_alarm();
+ pause();
}
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 1d7558dac75f..a3dd61521d24 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -137,20 +137,13 @@ static inline int is_umdir_used(char *dir)
{
char pid[sizeof("nnnnnnnnn")], *end, *file;
int dead, fd, p, n, err;
- size_t filelen;
+ size_t filelen = strlen(dir) + sizeof("/pid") + 1;
- err = asprintf(&file, "%s/pid", dir);
- if (err < 0)
- return 0;
-
- filelen = strlen(file);
+ file = malloc(filelen);
+ if (!file)
+ return -ENOMEM;
- n = snprintf(file, filelen, "%s/pid", dir);
- if (n >= filelen) {
- printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
- err = -E2BIG;
- goto out;
- }
+ snprintf(file, filelen, "%s/pid", dir);
dead = 0;
fd = open(file, O_RDONLY);