From 0f4bd46ec252887f44f1f065b41867cac8f70dfb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 22 Dec 2009 03:15:43 +0000 Subject: kmsg_dump: Dump on crash_kexec as well crash_kexec gets called before kmsg_dump(KMSG_DUMP_OOPS) if panic_on_oops is set, so the kernel log buffer is not stored for this case. This patch adds a KMSG_DUMP_KEXEC dump type which gets called when crash_kexec() is invoked. To avoid getting double dumps, the old KMSG_DUMP_PANIC is moved below crash_kexec(). The mtdoops driver is modified to handle KMSG_DUMP_KEXEC in the same way as a panic. Signed-off-by: KOSAKI Motohiro Acked-by: Simon Kagstrom Signed-off-by: David Woodhouse --- include/linux/kmsg_dump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index e32aa268efac..24b44145a886 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -17,6 +17,7 @@ enum kmsg_dump_reason { KMSG_DUMP_OOPS, KMSG_DUMP_PANIC, + KMSG_DUMP_KEXEC, }; /** -- cgit v1.2.3 From f6a8c60960bbea378142d1fa1b3d111555ee41c7 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Nov 2009 15:23:51 +0000 Subject: mtd: Really add ARM pismo support (Commit 7cb777a3d71f9d1f7eb149c7a504d21f24219ae8 (mtd: add ARM pismo support) intended to add this, but seems only to have patched the Makefile without touching Kconfig or providing any code...) The following patch adds support for PISMO modules found on ARM Ltd development platforms. These are MTD modules, and can have a selection of SRAM, flash or DOC devices as described by an on-board I2C EEPROM. We support SRAM and NOR flash devices only by registering appropriate conventional MTD platform devices as children of the 'pismo' device. Signed-off-by: Russell King Signed-off-by: David Woodhouse --- drivers/mtd/maps/Kconfig | 17 +++ drivers/mtd/maps/pismo.c | 320 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/pismo.h | 17 +++ 3 files changed, 354 insertions(+) create mode 100644 drivers/mtd/maps/pismo.c create mode 100644 include/linux/mtd/pismo.h (limited to 'include/linux') diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 4c364d44ad59..2de0cc823d60 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -549,4 +549,21 @@ config MTD_VMU To build this as a module select M here, the module will be called vmu-flash. +config MTD_PISMO + tristate "MTD discovery driver for PISMO modules" + depends on I2C + depends on ARCH_VERSATILE + help + This driver allows for discovery of PISMO modules - see + . These are small modules containing + up to five memory devices (eg, SRAM, flash, DOC) described by an + I2C EEPROM. + + This driver does not create any MTD maps itself; instead it + creates MTD physmap and MTD SRAM platform devices. If you + enable this option, you should consider enabling MTD_PHYSMAP + and/or MTD_PLATRAM according to the devices on your module. + + When built as a module, it will be called pismo.ko + endmenu diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c new file mode 100644 index 000000000000..c48cad271f5d --- /dev/null +++ b/drivers/mtd/maps/pismo.c @@ -0,0 +1,320 @@ +/* + * PISMO memory driver - http://www.pismoworld.org/ + * + * For ARM Realview and Versatile platforms + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PISMO_NUM_CS 5 + +struct pismo_cs_block { + u8 type; + u8 width; + __le16 access; + __le32 size; + u32 reserved[2]; + char device[32]; +} __packed; + +struct pismo_eeprom { + struct pismo_cs_block cs[PISMO_NUM_CS]; + char board[15]; + u8 sum; +} __packed; + +struct pismo_mem { + phys_addr_t base; + u32 size; + u16 access; + u8 width; + u8 type; +}; + +struct pismo_data { + struct i2c_client *client; + void (*vpp)(void *, int); + void *vpp_data; + struct platform_device *dev[PISMO_NUM_CS]; +}; + +/* FIXME: set_vpp could do with a better calling convention */ +static struct pismo_data *vpp_pismo; +static DEFINE_MUTEX(pismo_mutex); + +static int pismo_setvpp_probe_fix(struct pismo_data *pismo) +{ + mutex_lock(&pismo_mutex); + if (vpp_pismo) { + mutex_unlock(&pismo_mutex); + kfree(pismo); + return -EBUSY; + } + vpp_pismo = pismo; + mutex_unlock(&pismo_mutex); + return 0; +} + +static void pismo_setvpp_remove_fix(struct pismo_data *pismo) +{ + mutex_lock(&pismo_mutex); + if (vpp_pismo == pismo) + vpp_pismo = NULL; + mutex_unlock(&pismo_mutex); +} + +static void pismo_set_vpp(struct map_info *map, int on) +{ + struct pismo_data *pismo = vpp_pismo; + + pismo->vpp(pismo->vpp_data, on); +} +/* end of hack */ + + +static unsigned int __devinit pismo_width_to_bytes(unsigned int width) +{ + width &= 15; + if (width > 2) + return 0; + return 1 << width; +} + +static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf, + u8 addr, size_t size) +{ + int ret; + struct i2c_msg msg[] = { + { + .addr = client->addr, + .len = sizeof(addr), + .buf = &addr, + }, { + .addr = client->addr, + .flags = I2C_M_RD, + .len = size, + .buf = buf, + }, + }; + + ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg)); + + return ret == ARRAY_SIZE(msg) ? size : -EIO; +} + +static int __devinit pismo_add_device(struct pismo_data *pismo, int i, + struct pismo_mem *region, const char *name, void *pdata, size_t psize) +{ + struct platform_device *dev; + struct resource res = { }; + phys_addr_t base = region.base; + int ret; + + if (base == ~0) + return -ENXIO; + + res.start = base; + res.end = base + region->size - 1; + res.flags = IORESOURCE_MEM; + + dev = platform_device_alloc(name, i); + if (!dev) + return -ENOMEM; + dev->dev.parent = &pismo->client->dev; + + do { + ret = platform_device_add_resources(dev, &res, 1); + if (ret) + break; + + ret = platform_device_add_data(dev, pdata, psize); + if (ret) + break; + + ret = platform_device_add(dev); + if (ret) + break; + + pismo->dev[i] = dev; + return 0; + } while (0); + + platform_device_put(dev); + return ret; +} + +static int __devinit pismo_add_nor(struct pismo_data *pismo, int i, + struct pismo_mem *region) +{ + struct physmap_flash_data data = { + .width = region->width, + }; + + if (pismo->vpp) + data.set_vpp = pismo_set_vpp; + + return pismo_add_device(pismo, i, region, "physmap-flash", + &data, sizeof(data)); +} + +static int __devinit pismo_add_sram(struct pismo_data *pismo, int i, + struct pismo_mem *region) +{ + struct platdata_mtd_ram data = { + .bankwidth = region->width, + }; + + return pismo_add_device(pismo, i, region, "mtd-ram", + &data, sizeof(data)); +} + +static void __devinit pismo_add_one(struct pismo_data *pismo, int i, + const struct pismo_cs_block *cs, phys_addr_t base) +{ + struct device *dev = &pismo->client->dev; + struct pismo_mem region; + + region.base = base; + region.type = cs->type; + region.width = pismo_width_to_bytes(cs->width); + region.access = le16_to_cpu(cs->access); + region.size = le32_to_cpu(cs->size); + + if (region.width == 0) { + dev_err(dev, "cs%u: bad width: %02x, ignoring\n", i, cs->width); + return; + } + + /* + * FIXME: may need to the platforms memory controller here, but at + * the moment we assume that it has already been correctly setup. + * The memory controller can also tell us the base address as well. + */ + + dev_info(dev, "cs%u: %.32s: type %02x access %u00ps size %uK\n", + i, cs->device, region.type, region.access, region.size / 1024); + + switch (region.type) { + case 0: + break; + case 1: + /* static DOC */ + break; + case 2: + /* static NOR */ + pismo_add_nor(pismo, i, ®ion); + break; + case 3: + /* static RAM */ + pismo_add_sram(pismo, i, ®ion); + break; + } +} + +static int __devexit pismo_remove(struct i2c_client *client) +{ + struct pismo_data *pismo = i2c_get_clientdata(client); + int i; + + for (i = 0; i < ARRAY_SIZE(pismo->dev); i++) + platform_device_unregister(pismo->dev[i]); + + /* FIXME: set_vpp needs saner arguments */ + pismo_setvpp_remove_fix(pismo); + + kfree(pismo); + + return 0; +} + +static int __devinit pismo_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct pismo_pdata *pdata = client->dev.platform_data; + struct pismo_eeprom eeprom; + struct pismo_data *pismo; + int ret, i; + + if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) { + dev_err(&client->dev, "functionality mismatch\n"); + return -EIO; + } + + pismo = kzalloc(sizeof(*pismo), GFP_KERNEL); + if (!pismo) + return -ENOMEM; + + /* FIXME: set_vpp needs saner arguments */ + ret = pismo_setvpp_probe_fix(pismo); + if (ret) + return ret; + + pismo->client = client; + if (pdata) { + pismo->vpp = pdata->set_vpp; + pismo->vpp_data = pdata->vpp_data; + } + i2c_set_clientdata(client, pismo); + + ret = pismo_eeprom_read(client, &eeprom, 0, sizeof(eeprom)); + if (ret < 0) { + dev_err(&client->dev, "error reading EEPROM: %d\n", ret); + return ret; + } + + dev_info(&client->dev, "%.15s board found\n", eeprom.board); + + for (i = 0; i < ARRAY_SIZE(eeprom.cs); i++) + if (eeprom.cs[i].type != 0xff) + pismo_add_one(pismo, i, &eeprom.cs[i], + pdata->cs_addrs[i]); + + return 0; +} + +static const struct i2c_device_id pismo_id[] = { + { "pismo" }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, pismo_id); + +static struct i2c_driver pismo_driver = { + .driver = { + .name = "pismo", + .owner = THIS_MODULE, + }, + .probe = pismo_probe, + .remove = __devexit_p(pismo_remove), + .id_table = pismo_id, +}; + +static int __init pismo_init(void) +{ + BUILD_BUG_ON(sizeof(struct pismo_cs_block) != 48); + BUILD_BUG_ON(sizeof(struct pismo_eeprom) != 256); + + return i2c_add_driver(&pismo_driver); +} +module_init(pismo_init); + +static void __exit pismo_exit(void) +{ + i2c_del_driver(&pismo_driver); +} +module_exit(pismo_exit); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("PISMO memory driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mtd/pismo.h b/include/linux/mtd/pismo.h new file mode 100644 index 000000000000..8dfb7e1421c5 --- /dev/null +++ b/include/linux/mtd/pismo.h @@ -0,0 +1,17 @@ +/* + * PISMO memory driver - http://www.pismoworld.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef __LINUX_MTD_PISMO_H +#define __LINUX_MTD_PISMO_H + +struct pismo_pdata { + void (*set_vpp)(void *, int); + void *vpp_data; + phys_addr_t cs_addrs[5]; +}; + +#endif -- cgit v1.2.3 From ec51b7f538c440bfa5a4d538133c659071c02155 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 19 Jan 2010 00:27:58 -0800 Subject: Input: ad7879 - support auxiliary GPIOs via gpiolib Drop the simple fancy sysfs hooks for the aux GPIOs and expose these via the gpiolib interface so that other drivers can use them. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ad7879.c | 197 ++++++++++++++++++++++++++----------- include/linux/spi/ad7879.h | 12 ++- 2 files changed, 149 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index c21e6d3a8844..794d070c6900 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -47,6 +47,7 @@ #include #include #include +#include #include @@ -132,7 +133,9 @@ struct ad7879 { struct input_dev *input; struct work_struct work; struct timer_list timer; - +#ifdef CONFIG_GPIOLIB + struct gpio_chip gc; +#endif struct mutex mutex; unsigned disabled:1; /* P: mutex */ @@ -150,11 +153,9 @@ struct ad7879 { u8 median; u16 x_plate_ohms; u16 pressure_max; - u16 gpio_init; u16 cmd_crtl1; u16 cmd_crtl2; u16 cmd_crtl3; - unsigned gpio:1; }; static int ad7879_read(bus_device *, u8); @@ -237,24 +238,6 @@ static irqreturn_t ad7879_irq(int irq, void *handle) static void ad7879_setup(struct ad7879 *ts) { - ts->cmd_crtl3 = AD7879_YPLUS_BIT | - AD7879_XPLUS_BIT | - AD7879_Z2_BIT | - AD7879_Z1_BIT | - AD7879_TEMPMASK_BIT | - AD7879_AUXVBATMASK_BIT | - AD7879_GPIOALERTMASK_BIT; - - ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR | - AD7879_AVG(ts->averaging) | - AD7879_MFS(ts->median) | - AD7879_FCD(ts->first_conversion_delay) | - ts->gpio_init; - - ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 | - AD7879_ACQ(ts->acquisition_time) | - AD7879_TMR(ts->pen_down_acc_interval); - ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2); ad7879_write(ts->bus, AD7879_REG_CTRL3, ts->cmd_crtl3); ad7879_write(ts->bus, AD7879_REG_CTRL1, ts->cmd_crtl1); @@ -324,48 +307,132 @@ static ssize_t ad7879_disable_store(struct device *dev, static DEVICE_ATTR(disable, 0664, ad7879_disable_show, ad7879_disable_store); -static ssize_t ad7879_gpio_show(struct device *dev, - struct device_attribute *attr, char *buf) +static struct attribute *ad7879_attributes[] = { + &dev_attr_disable.attr, + NULL +}; + +static const struct attribute_group ad7879_attr_group = { + .attrs = ad7879_attributes, +}; + +#ifdef CONFIG_GPIOLIB +static int ad7879_gpio_direction_input(struct gpio_chip *chip, + unsigned gpio) { - struct ad7879 *ts = dev_get_drvdata(dev); + struct ad7879 *ts = container_of(chip, struct ad7879, gc); + int err; - return sprintf(buf, "%u\n", ts->gpio); + mutex_lock(&ts->mutex); + ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIODIR | AD7879_GPIOPOL; + err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2); + mutex_unlock(&ts->mutex); + + return err; } -static ssize_t ad7879_gpio_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static int ad7879_gpio_direction_output(struct gpio_chip *chip, + unsigned gpio, int level) { - struct ad7879 *ts = dev_get_drvdata(dev); - unsigned long val; - int error; + struct ad7879 *ts = container_of(chip, struct ad7879, gc); + int err; - error = strict_strtoul(buf, 10, &val); - if (error) - return error; + mutex_lock(&ts->mutex); + ts->cmd_crtl2 &= ~AD7879_GPIODIR; + ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIOPOL; + if (level) + ts->cmd_crtl2 |= AD7879_GPIO_DATA; + else + ts->cmd_crtl2 &= ~AD7879_GPIO_DATA; + + err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2); + mutex_unlock(&ts->mutex); + + return err; +} + +static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio) +{ + struct ad7879 *ts = container_of(chip, struct ad7879, gc); + u16 val; mutex_lock(&ts->mutex); - ts->gpio = !!val; - error = ad7879_write(ts->bus, AD7879_REG_CTRL2, - ts->gpio ? - ts->cmd_crtl2 & ~AD7879_GPIO_DATA : - ts->cmd_crtl2 | AD7879_GPIO_DATA); + val = ad7879_read(ts->bus, AD7879_REG_CTRL2); mutex_unlock(&ts->mutex); - return error ? : count; + return !!(val & AD7879_GPIO_DATA); } -static DEVICE_ATTR(gpio, 0664, ad7879_gpio_show, ad7879_gpio_store); +static void ad7879_gpio_set_value(struct gpio_chip *chip, + unsigned gpio, int value) +{ + struct ad7879 *ts = container_of(chip, struct ad7879, gc); -static struct attribute *ad7879_attributes[] = { - &dev_attr_disable.attr, - &dev_attr_gpio.attr, - NULL -}; + mutex_lock(&ts->mutex); + if (value) + ts->cmd_crtl2 |= AD7879_GPIO_DATA; + else + ts->cmd_crtl2 &= ~AD7879_GPIO_DATA; -static const struct attribute_group ad7879_attr_group = { - .attrs = ad7879_attributes, -}; + ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2); + mutex_unlock(&ts->mutex); +} + +static int __devinit ad7879_gpio_add(struct device *dev) +{ + struct ad7879 *ts = dev_get_drvdata(dev); + struct ad7879_platform_data *pdata = dev->platform_data; + int ret = 0; + + if (pdata->gpio_export) { + ts->gc.direction_input = ad7879_gpio_direction_input; + ts->gc.direction_output = ad7879_gpio_direction_output; + ts->gc.get = ad7879_gpio_get_value; + ts->gc.set = ad7879_gpio_set_value; + ts->gc.can_sleep = 1; + ts->gc.base = pdata->gpio_base; + ts->gc.ngpio = 1; + ts->gc.label = "AD7879-GPIO"; + ts->gc.owner = THIS_MODULE; + ts->gc.dev = dev; + + ret = gpiochip_add(&ts->gc); + if (ret) + dev_err(dev, "failed to register gpio %d\n", + ts->gc.base); + } + + return ret; +} + +/* + * We mark ad7879_gpio_remove inline so there is a chance the code + * gets discarded when not needed. We can't do __devinit/__devexit + * markup since it is used in both probe and remove methods. + */ +static inline void ad7879_gpio_remove(struct device *dev) +{ + struct ad7879 *ts = dev_get_drvdata(dev); + struct ad7879_platform_data *pdata = dev->platform_data; + int ret; + + if (pdata->gpio_export) { + ret = gpiochip_remove(&ts->gc); + if (ret) + dev_err(dev, "failed to remove gpio %d\n", + ts->gc.base); + } +} +#else +static inline int ad7879_gpio_add(struct device *dev) +{ + return 0; +} + +static inline void ad7879_gpio_remove(struct device *dev) +{ +} +#endif static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts) { @@ -403,12 +470,6 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts) ts->pen_down_acc_interval = pdata->pen_down_acc_interval; ts->median = pdata->median; - if (pdata->gpio_output) - ts->gpio_init = AD7879_GPIO_EN | - (pdata->gpio_default ? 0 : AD7879_GPIO_DATA); - else - ts->gpio_init = AD7879_GPIO_EN | AD7879_GPIODIR; - snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&bus->dev)); input_dev->name = "AD7879 Touchscreen"; @@ -446,6 +507,23 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts) goto err_free_mem; } + ts->cmd_crtl3 = AD7879_YPLUS_BIT | + AD7879_XPLUS_BIT | + AD7879_Z2_BIT | + AD7879_Z1_BIT | + AD7879_TEMPMASK_BIT | + AD7879_AUXVBATMASK_BIT | + AD7879_GPIOALERTMASK_BIT; + + ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR | + AD7879_AVG(ts->averaging) | + AD7879_MFS(ts->median) | + AD7879_FCD(ts->first_conversion_delay); + + ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 | + AD7879_ACQ(ts->acquisition_time) | + AD7879_TMR(ts->pen_down_acc_interval); + ad7879_setup(ts); err = request_irq(bus->irq, ad7879_irq, @@ -460,15 +538,21 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts) if (err) goto err_free_irq; - err = input_register_device(input_dev); + err = ad7879_gpio_add(&bus->dev); if (err) goto err_remove_attr; + err = input_register_device(input_dev); + if (err) + goto err_remove_gpio; + dev_info(&bus->dev, "Rev.%d touchscreen, irq %d\n", revid >> 8, bus->irq); return 0; +err_remove_gpio: + ad7879_gpio_remove(&bus->dev); err_remove_attr: sysfs_remove_group(&bus->dev.kobj, &ad7879_attr_group); err_free_irq: @@ -481,6 +565,7 @@ err_free_mem: static int __devexit ad7879_destroy(bus_device *bus, struct ad7879 *ts) { + ad7879_gpio_remove(&bus->dev); ad7879_disable(ts); sysfs_remove_group(&ts->bus->dev.kobj, &ad7879_attr_group); free_irq(ts->bus->irq, ts); diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h index 4231104c9afa..6334cee1a3be 100644 --- a/include/linux/spi/ad7879.h +++ b/include/linux/spi/ad7879.h @@ -28,8 +28,12 @@ struct ad7879_platform_data { * 1 = 4, 2 = 8, 3 = 16 (median > averaging) */ u8 median; - /* 1 = AUX/VBAT/GPIO set to GPIO Output */ - u8 gpio_output; - /* Initial GPIO pin state (valid if gpio_output = 1) */ - u8 gpio_default; + /* 1 = AUX/VBAT/GPIO export GPIO to gpiolib + * requires CONFIG_GPIOLIB + */ + bool gpio_export; + /* identifies the first GPIO number handled by this chip; + * or, if negative, requests dynamic ID allocation. + */ + s32 gpio_base; }; -- cgit v1.2.3 From cb289d6244a37cf932c571d6deb0daa8030f931b Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Wed, 13 Jan 2010 09:34:36 -0800 Subject: eventfd - allow atomic read and waitqueue remove KVM needs a wait to atomically remove themselves from the eventfd ->poll() wait queue head, in order to handle correctly their IRQfd deassign operation. This patch introduces such API, plus a way to read an eventfd from its context. Signed-off-by: Davide Libenzi Signed-off-by: Avi Kivity --- fs/eventfd.c | 89 ++++++++++++++++++++++++++++++++++++++++--------- include/linux/eventfd.h | 16 +++++++++ 2 files changed, 90 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/eventfd.c b/fs/eventfd.c index d26402ff06ea..7758cc382ef0 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) return events; } -static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= *cnt; +} + +/** + * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. + * @ctx: [in] Pointer to eventfd context. + * @wait: [in] Wait queue to be removed. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked. + * + * This is used to atomically remove a wait queue entry from the eventfd wait + * queue head, and read/reset the counter value. + */ +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->wqh.lock, flags); + eventfd_ctx_do_read(ctx, cnt); + __remove_wait_queue(&ctx->wqh, wait); + if (*cnt != 0 && waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, POLLOUT); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return *cnt != 0 ? 0 : -EAGAIN; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); + +/** + * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. + * @ctx: [in] Pointer to eventfd context. + * @no_wait: [in] Different from zero if the operation should not block. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked but @no_wait was nonzero. + * -ERESTARTSYS : A signal interrupted the wait operation. + * + * If @no_wait is zero, the function might sleep until the eventfd internal + * counter becomes greater than zero. + */ +ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt) { - struct eventfd_ctx *ctx = file->private_data; ssize_t res; - __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); - if (count < sizeof(ucnt)) - return -EINVAL; spin_lock_irq(&ctx->wqh.lock); + *cnt = 0; res = -EAGAIN; if (ctx->count > 0) - res = sizeof(ucnt); - else if (!(file->f_flags & O_NONBLOCK)) { + res = 0; + else if (!no_wait) { __add_wait_queue(&ctx->wqh, &wait); - for (res = 0;;) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count > 0) { - res = sizeof(ucnt); + res = 0; break; } if (signal_pending(current)) { @@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (likely(res > 0)) { - ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; - ctx->count -= ucnt; + if (likely(res == 0)) { + eventfd_ctx_do_read(ctx, cnt); if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, POLLOUT); } spin_unlock_irq(&ctx->wqh.lock); - if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) - return -EFAULT; return res; } +EXPORT_SYMBOL_GPL(eventfd_ctx_read); + +static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct eventfd_ctx *ctx = file->private_data; + ssize_t res; + __u64 cnt; + + if (count < sizeof(cnt)) + return -EINVAL; + res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt); + if (res < 0) + return res; + + return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt); +} static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 94dd10366a78..91bb4f27238c 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -10,6 +10,7 @@ #include #include +#include /* * CAREFUL: Check include/asm-generic/fcntl.h when defining @@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); int eventfd_signal(struct eventfd_ctx *ctx, int n); +ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt); #else /* CONFIG_EVENTFD */ @@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) } +static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, + __u64 *cnt) +{ + return -ENOSYS; +} + +static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, + wait_queue_t *wait, __u64 *cnt) +{ + return -ENOSYS; +} + #endif #endif /* _LINUX_EVENTFD_H */ -- cgit v1.2.3 From 6d3faf6f431bafb25f4b9926c50a7e5c267738c6 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 24 Jan 2010 14:48:00 +0100 Subject: firewire: cdev: add_descriptor documentation fix struct fw_cdev_add_descriptor.length is in quadlets, not in bytes. Also remove any doubts about the endianess of descriptor data. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 1f716d9f714b..520ecf86cbb3 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -380,7 +380,7 @@ struct fw_cdev_initiate_bus_reset { * @immediate: If non-zero, immediate key to insert before pointer * @key: Upper 8 bits of root directory pointer * @data: Userspace pointer to contents of descriptor block - * @length: Length of descriptor block data, in bytes + * @length: Length of descriptor block data, in quadlets * @handle: Handle to the descriptor, written by the kernel * * Add a descriptor block and optionally a preceding immediate key to the local @@ -394,6 +394,8 @@ struct fw_cdev_initiate_bus_reset { * If not 0, the @immediate field specifies an immediate key which will be * inserted before the root directory pointer. * + * @immediate, @key, and @data array elements are CPU-endian quadlets. + * * If successful, the kernel adds the descriptor and writes back a handle to the * kernel-side object to be used for later removal of the descriptor block and * immediate key. -- cgit v1.2.3 From 0531b2aac59c2296570ac52bfc032ef2ace7d5e1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 Jan 2010 09:20:03 -0800 Subject: mm: add new 'read_cache_page_gfp()' helper function It's a simplified 'read_cache_page()' which takes a page allocation flag, so that different paths can control how aggressive the memory allocations are that populate a address space. In particular, the intel GPU object mapping code wants to be able to do a certain amount of own internal memory management by automatically shrinking the address space when memory starts getting tight. This allows it to dynamically use different memory allocation policies on a per-allocation basis, rather than depend on the (static) address space gfp policy. The actual new function is a one-liner, but re-organizing the helper functions to the point where you can do this with a single line of code is what most of the patch is all about. Tested-by: Chris Wilson Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 + mm/filemap.c | 100 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 70 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ed5d7501e181..3c62ed408492 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -253,6 +253,8 @@ extern struct page * read_cache_page_async(struct address_space *mapping, extern struct page * read_cache_page(struct address_space *mapping, pgoff_t index, filler_t *filler, void *data); +extern struct page * read_cache_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); diff --git a/mm/filemap.c b/mm/filemap.c index 96ac6b0eb6cb..e3736923220e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); static struct page *__read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), - void *data) + void *data, + gfp_t gfp) { struct page *page; int err; repeat: page = find_get_page(mapping, index); if (!page) { - page = page_cache_alloc_cold(mapping); + page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); @@ -1661,31 +1662,18 @@ repeat: return page; } -/** - * read_cache_page_async - read into page cache, fill it if needed - * @mapping: the page's address_space - * @index: the page index - * @filler: function to perform the read - * @data: destination for read data - * - * Same as read_cache_page, but don't wait for page to become unlocked - * after submitting it to the filler. - * - * Read into the page cache. If a page already exists, and PageUptodate() is - * not set, try to fill the page but don't wait for it to become unlocked. - * - * If the page does not get brought uptodate, return -EIO. - */ -struct page *read_cache_page_async(struct address_space *mapping, +static struct page *do_read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), - void *data) + void *data, + gfp_t gfp) + { struct page *page; int err; retry: - page = __read_cache_page(mapping, index, filler, data); + page = __read_cache_page(mapping, index, filler, data, gfp); if (IS_ERR(page)) return page; if (PageUptodate(page)) @@ -1710,8 +1698,67 @@ out: mark_page_accessed(page); return page; } + +/** + * read_cache_page_async - read into page cache, fill it if needed + * @mapping: the page's address_space + * @index: the page index + * @filler: function to perform the read + * @data: destination for read data + * + * Same as read_cache_page, but don't wait for page to become unlocked + * after submitting it to the filler. + * + * Read into the page cache. If a page already exists, and PageUptodate() is + * not set, try to fill the page but don't wait for it to become unlocked. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page_async(struct address_space *mapping, + pgoff_t index, + int (*filler)(void *,struct page*), + void *data) +{ + return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); +} EXPORT_SYMBOL(read_cache_page_async); +static struct page *wait_on_page_read(struct page *page) +{ + if (!IS_ERR(page)) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + page_cache_release(page); + page = ERR_PTR(-EIO); + } + } + return page; +} + +/** + * read_cache_page_gfp - read into page cache, using specified page allocation flags. + * @mapping: the page's address_space + * @index: the page index + * @gfp: the page allocator flags to use if allocating + * + * This is the same as "read_mapping_page(mapping, index, NULL)", but with + * any new page allocations done using the specified allocation flags. Note + * that the Radix tree operations will still use GFP_KERNEL, so you can't + * expect to do this atomically or anything like that - but you can pass in + * other page requirements. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page_gfp(struct address_space *mapping, + pgoff_t index, + gfp_t gfp) +{ + filler_t *filler = (filler_t *)mapping->a_ops->readpage; + + return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); +} +EXPORT_SYMBOL(read_cache_page_gfp); + /** * read_cache_page - read into page cache, fill it if needed * @mapping: the page's address_space @@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping, int (*filler)(void *,struct page*), void *data) { - struct page *page; - - page = read_cache_page_async(mapping, index, filler, data); - if (IS_ERR(page)) - goto out; - wait_on_page_locked(page); - if (!PageUptodate(page)) { - page_cache_release(page); - page = ERR_PTR(-EIO); - } - out: - return page; + return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); } EXPORT_SYMBOL(read_cache_page); -- cgit v1.2.3 From bb209c8287d2d55ec4a67e3933346e0a3ee0da76 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 26 Jan 2010 17:10:03 +0000 Subject: powerpc/pci: Add calls to set_pcie_port_type() and set_pcie_hotplug_bridge() We are missing these when building the pci_dev from scratch off the Open Firmware device-tree Signed-off-by: Benjamin Herrenschmidt Acked-by: Jesse Barnes --- arch/powerpc/kernel/pci_of_scan.c | 2 ++ drivers/pci/probe.c | 4 ++-- include/linux/pci.h | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 7311fdfb9bf8..693eb9a25bfa 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -140,6 +140,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ dev->needs_freset = 0; /* pcie fundamental reset required */ + set_pcie_port_type(dev); dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); @@ -164,6 +165,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; dev->rom_base_reg = PCI_ROM_ADDRESS1; + set_pcie_hotplug_bridge(dev); } else if (!strcmp(type, "cardbus")) { dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; } else { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 98ffb2de22e9..446e4a94d7d3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -681,7 +681,7 @@ static void pci_read_irq(struct pci_dev *dev) dev->irq = irq; } -static void set_pcie_port_type(struct pci_dev *pdev) +void set_pcie_port_type(struct pci_dev *pdev) { int pos; u16 reg16; @@ -695,7 +695,7 @@ static void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; } -static void set_pcie_hotplug_bridge(struct pci_dev *pdev) +void set_pcie_hotplug_bridge(struct pci_dev *pdev) { int pos; u16 reg16; diff --git a/include/linux/pci.h b/include/linux/pci.h index 174e5392e51e..c1968f464c38 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -756,6 +756,10 @@ pci_power_t pci_target_state(struct pci_dev *dev); int pci_prepare_to_sleep(struct pci_dev *dev); int pci_back_from_sleep(struct pci_dev *dev); +/* For use by arch with custom probe code */ +void set_pcie_port_type(struct pci_dev *pdev); +void set_pcie_hotplug_bridge(struct pci_dev *pdev); + /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); #ifdef CONFIG_HOTPLUG -- cgit v1.2.3 From cb6ecf6f7afece066265e243657b0ac28150a7b2 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 28 Jan 2010 22:28:27 -0800 Subject: Input: add the ABS_MT_PRESSURE event For pressure-based multi-touch devices, a direct way to send sensor intensity data per finger is needed. This patch adds the ABS_MT_PRESSURE event to the MT protocol. Requested-by: Yoonyoung Shim Requested-by: Mika Kuoppala Requested-by: Peter Hutterer Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 1 + include/linux/input.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 30b503b8d67b..86cb2d2196ff 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -46,6 +46,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = { ABS_MT_TOOL_TYPE, ABS_MT_BLOB_ID, ABS_MT_TRACKING_ID, + ABS_MT_PRESSURE, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; diff --git a/include/linux/input.h b/include/linux/input.h index 7be8a6537b57..735ceaf1bc2d 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -660,6 +660,7 @@ struct input_absinfo { #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ #define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ +#define ABS_MT_PRESSURE 0x3a /* Pressure on contact area */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) -- cgit v1.2.3 From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jan 2010 22:14:42 -0800 Subject: Split 'flush_old_exec' into two functions 'flush_old_exec()' is the point of no return when doing an execve(), and it is pretty badly misnamed. It doesn't just flush the old executable environment, it also starts up the new one. Which is very inconvenient for things like setting up the new personality, because we want the new personality to affect the starting of the new environment, but at the same time we do _not_ want the new personality to take effect if flushing the old one fails. As a result, the x86-64 '32-bit' personality is actually done using this insane "I'm going to change the ABI, but I haven't done it yet" bit (TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the personality, but just the "pending" bit, so that "flush_thread()" can do the actual personality magic. This patch in no way changes any of that insanity, but it does split the 'flush_old_exec()' function up into a preparatory part that can fail (still called flush_old_exec()), and a new part that will actually set up the new exec environment (setup_new_exec()). All callers are changed to trivially comply with the new world order. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/sh/kernel/process_64.c | 2 +- arch/x86/ia32/ia32_aout.c | 10 ++++++---- fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 27 ++------------------------- fs/binfmt_elf_fdpic.c | 3 +++ fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/exec.c | 26 ++++++++++++++++---------- include/linux/binfmts.h | 1 + include/linux/sched.h | 2 +- 10 files changed, 33 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 31f80c61b031..ec79faf6f021 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -368,7 +368,7 @@ void exit_thread(void) void flush_thread(void) { - /* Called by fs/exec.c (flush_old_exec) to remove traces of a + /* Called by fs/exec.c (setup_new_exec) to remove traces of a * previously running executable. */ #ifdef CONFIG_SH_FPU if (last_task_used_math == current) { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf1..435d2a5323da 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval) return retval; - regs->cs = __USER32_CS; - regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = - regs->r13 = regs->r14 = regs->r15 = 0; - /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); + setup_new_exec(bprm); + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 346b69405363..fdd397099172 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index edd90c49003c..fd5b2ea5d299 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c57d9ce5ff7e..18d77297ccc8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d4a00ea1054c..42c6b4a54445 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 2a9b5330cc5e..cc8560f6c9b0 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/exec.c b/fs/exec.c index 632b02e34ec7..675c3f44c2ea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; bprm->mm = NULL; /* We're using it now */ + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index cd4349bdc34e..89c6249fc561 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *); extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern void setup_new_exec(struct linux_binprm * bprm); extern int suid_dumpable; #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929b..abdfacc58653 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1369,7 +1369,7 @@ struct task_struct { char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - - initialized normally by flush_old_exec */ + - initialized normally by setup_new_exec */ /* file system info */ int link_count, total_link_count; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:43 -0600 Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger This patch fixes the regression in functionality where the kernel debugger and the perf API do not nicely share hw breakpoint reservations. The kernel debugger cannot use any mutex_lock() calls because it can start the kernel running from an invalid context. A mutex free version of the reservation API needed to get created for the kernel debugger to safely update hw breakpoint reservations. The possibility for a breakpoint reservation to be concurrently processed at the time that kgdb interrupts the system is improbable. Should this corner case occur the end user is warned, and the kernel debugger will prohibit updating the hardware breakpoint reservations. Any time the kernel debugger reserves a hardware breakpoint it will be a system wide reservation. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hw_breakpoint.h | 2 ++ kernel/hw_breakpoint.c | 52 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 62bea7307eaa..bfba6019d762 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void) hw_breakpoint_restore(); } +static int hw_break_reserve_slot(int breakno) +{ + int cpu; + int cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + struct perf_event **pevent; + int cpu; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responisble for handing the retry on + * remove failure. + */ + return -1; + } + return 0; +} + static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { @@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) if (i == 4) return -1; + if (hw_break_release_slot(i)) { + printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } breakinfo[i].enabled = 0; return 0; @@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) return -1; } breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } breakinfo[i].enabled = 1; return 0; diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 41235c93e4e9..070ba0621738 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); +extern int dbg_reserve_bp_slot(struct perf_event *bp); +extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c030ae657f20..8a5c7d55ac9f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ -int reserve_bp_slot(struct perf_event *bp) +static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - int ret = 0; - - mutex_lock(&nr_bp_mutex); fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } + if (slots.pinned + (!!slots.flexible) == HBP_NUM) + return -ENOSPC; toggle_bp_slot(bp, true); -end: + return 0; +} + +int reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + mutex_lock(&nr_bp_mutex); + + ret = __reserve_bp_slot(bp); + mutex_unlock(&nr_bp_mutex); return ret; } +static void __release_bp_slot(struct perf_event *bp) +{ + toggle_bp_slot(bp, false); +} + void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); - toggle_bp_slot(bp, false); + __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); } +/* + * Allow the kernel debugger to reserve breakpoint slots without + * taking a lock using the dbg_* variant of for the reserve and + * release breakpoint slots. + */ +int dbg_reserve_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + return __reserve_bp_slot(bp); +} + +int dbg_release_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + __release_bp_slot(bp); + + return 0; +} int register_perf_hw_breakpoint(struct perf_event *bp) { -- cgit v1.2.3 From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 27 Jan 2010 16:25:22 -0600 Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb resume When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets the time from hardware such as the TSC on x86. In this configuration kgdb will report a softlock warning message on resuming or detaching from a debug session. Sequence of events in the problem case: 1) "cpu sched clock" and "hardware time" are at 100 sec prior to a call to kgdb_handle_exception() 2) Debugger waits in kgdb_handle_exception() for 80 sec and on exit the following is called ... touch_softlockup_watchdog() --> __raw_get_cpu_var(touch_timestamp) = 0; 3) "cpu sched clock" = 100s (it was not updated, because the interrupt was disabled in kgdb) but the "hardware time" = 180 sec 4) The first timer interrupt after resuming from kgdb_handle_exception updates the watchdog from the "cpu sched clock" update_process_times() { ... run_local_timers() --> softlockup_tick() --> check (touch_timestamp == 0) (it is "YES" here, we have set "touch_timestamp = 0" at kgdb) --> __touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp" to "get_timestamp()" (Here, the "touch_timestamp" will still be set to 100s.) ... scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched clock" to "hardware time" = 180s) ... } 5) The Second timer interrupt handler appears to have a large jump and trips the softlockup warning. update_process_times() { ... run_local_timers() --> softlockup_tick() --> "cpu sched clock" - "touch_timestamp" = 180s-100s > 60s --> printk "soft lockup error messages" ... } note: ***(A) reset "touch_timestamp" to "get_timestamp(this_cpu)" Why is "touch_timestamp" 100 sec, instead of 180 sec? When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of get_timestamp() is: get_timestamp(this_cpu) -->cpu_clock(this_cpu) -->sched_clock_cpu(this_cpu) -->__update_sched_clock(sched_clock_data, now) The __update_sched_clock() function uses the GTOD tick value to create a window to normalize the "now" values. So if "now" value is too big for sched_clock_data, it will be ignored. The fix is to invoke sched_clock_tick() to update "cpu sched clock" in order to recover from this state. This is done by introducing the function touch_softlockup_watchdog_sync(). This allows kgdb to request that the sched clock is updated when the watchdog thread runs the first time after a resume from kgdb. [yong.zhang0@gmail.com: Use per cpu instead of an array] Signed-off-by: Jason Wessel Signed-off-by: Dongdong Deng Cc: kgdb-bugreport@lists.sourceforge.net Cc: peterz@infradead.org LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/kgdb.c | 6 +++--- kernel/softlockup.c | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929b..89232151a9d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, void __user *buffer, @@ -323,6 +324,9 @@ static inline void softlockup_tick(void) static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) +{ +} static inline void touch_all_softlockup_watchdogs(void) { } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..87f2cc557553 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs) /* Signal the primary CPU that we are done: */ atomic_set(&cpu_in_kgdb[cpu], 0); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); } @@ -1450,7 +1450,7 @@ acquirelock: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); @@ -1550,7 +1550,7 @@ kgdb_restore: } /* Free kgdb_active */ atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d22579087e27..0d4c7898ab80 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock); static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); +static DEFINE_PER_CPU(bool, softlock_touch_sync); static int __read_mostly did_panic; int __read_mostly softlockup_thresh = 60; @@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void) } EXPORT_SYMBOL(touch_softlockup_watchdog); +void touch_softlockup_watchdog_sync(void) +{ + __raw_get_cpu_var(softlock_touch_sync) = true; + __raw_get_cpu_var(softlockup_touch_ts) = 0; +} + void touch_all_softlockup_watchdogs(void) { int cpu; @@ -118,6 +125,14 @@ void softlockup_tick(void) } if (touch_ts == 0) { + if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { + /* + * If the time stamp was touched atomically + * make sure the scheduler tick is up to date. + */ + per_cpu(softlock_touch_sync, this_cpu) = false; + sched_clock_tick(); + } __touch_softlockup_watchdog(); return; } -- cgit v1.2.3 From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sat, 30 Jan 2010 10:25:18 +0530 Subject: perf: Make bp_len type to u64 generic across the arch Change 'bp_len' type to __u64 to make it work across archs as the s390 architecture watch point length can be upto 2^64. reference: http://lkml.org/lkml/2010/1/25/212 This is an ABI change that is not backward compatible with the previous hardware breakpoint info layout integrated in this development cycle, a rebuilt of perf tools is necessary for versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a kernel based on this patch. Signed-off-by: Mahesh Salgaonkar Acked-by: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: "K. Prasad" Cc: Maneesh Soni Cc: Heiko Carstens Cc: Martin LKML-Reference: <20100130045518.GA20776@in.ibm.com> Signed-off-by: Frederic Weisbecker --- include/linux/hw_breakpoint.h | 2 +- include/linux/perf_event.h | 6 ++---- kernel/hw_breakpoint.c | 2 +- kernel/perf_event.c | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 070ba0621738..5977b724f7c6 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp) return bp->attr.bp_type; } -static inline int hw_breakpoint_len(struct perf_event *bp) +static inline unsigned long hw_breakpoint_len(struct perf_event *bp) { return bp->attr.bp_len; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8fa71874113f..a177698d95e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -211,11 +211,9 @@ struct perf_event_attr { __u32 wakeup_watermark; /* bytes before wakeup */ }; - __u32 __reserved_2; - - __u64 bp_addr; __u32 bp_type; - __u32 bp_len; + __u64 bp_addr; + __u64 bp_len; }; /* diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 8a5c7d55ac9f..967e66143e11 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { u64 old_addr = bp->attr.bp_addr; + u64 old_len = bp->attr.bp_len; int old_type = bp->attr.bp_type; - int old_len = bp->attr.bp_len; int err = 0; perf_event_disable(bp); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d27746bd3a06..2b19297742cb 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->type >= PERF_TYPE_MAX) return -EINVAL; - if (attr->__reserved_1 || attr->__reserved_2) + if (attr->__reserved_1) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) -- cgit v1.2.3 From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jan 2010 13:30:11 +0100 Subject: libata: fix ata_id_logical_per_physical_sectors The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not a plain multiple, but the log of it, so fix the helper to give the correct answer. Without this we'll get an incorrect minimal I/O size in the block limits VPD page for 4k sector drives. Also change the return value of ata_id_logical_per_physical_sectors to u16 for the unlikely case of very large logical sectors. Signed-off-by: Christoph Hellwig Signed-off-by: Jeff Garzik --- include/linux/ata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 38a6948ce0c2..20f31567ccee 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id) return id[ATA_ID_SECTOR_SIZE] & (1 << 13); } -static inline u8 ata_id_logical_per_physical_sectors(const u16 *id) +static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) { - return id[ATA_ID_SECTOR_SIZE] & 0xf; + return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); } static inline int ata_id_has_lba48(const u16 *id) -- cgit v1.2.3 From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 5 Feb 2010 16:09:11 +1100 Subject: percpu: add __percpu for sparse This is to make the annotation of percpu variables during the next merge window less painfull. Extracted from a patch by Rusty Russell. Signed-off-by: Stephen Rothwell Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5be3dab4a695..188fcae10a99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ -- cgit v1.2.3 From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 20 Jan 2010 15:35:41 -0500 Subject: fix ima breakage The "Untangling ima mess, part 2 with counters" patch messed up the counters. Based on conversations with Al Viro, this patch streamlines ima_path_check() by removing the counter maintaince. The counters are now updated independently, from measuring the file, in __dentry_open() and alloc_file() by calling ima_counts_get(). ima_path_check() is called from nfsd and do_filp_open(). It also did not measure all files that should have been measured. Reason: ima_path_check() got bogus value passed as mask. [AV: mea culpa] [AV: add missing nfsd bits] Signed-off-by: Mimi Zohar Signed-off-by: Al Viro --- fs/namei.c | 6 +- fs/nfsd/vfs.c | 3 +- include/linux/ima.h | 4 +- security/integrity/ima/ima_main.c | 236 +++++++++++++++----------------------- 4 files changed, 97 insertions(+), 152 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 94a5e60779f9..cd77b6375efd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1736,8 +1736,7 @@ do_last: if (nd.root.mnt) path_put(&nd.root); if (!IS_ERR(filp)) { - error = ima_path_check(&filp->f_path, filp->f_mode & - (MAY_READ | MAY_WRITE | MAY_EXEC)); + error = ima_path_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); @@ -1797,8 +1796,7 @@ ok: } filp = nameidata_to_filp(&nd); if (!IS_ERR(filp)) { - error = ima_path_check(&filp->f_path, filp->f_mode & - (MAY_READ | MAY_WRITE | MAY_EXEC)); + error = ima_path_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 325959e264ce..32477e3a645c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_path_check(&(*filp)->f_path, - access & (MAY_READ | MAY_WRITE | MAY_EXEC)); + host_err = ima_path_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: diff --git a/include/linux/ima.h b/include/linux/ima.h index 99dc6d5cf7e5..aa55a8f1f5b9 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct file *file, int mask) { return 0; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index a89f44d5e030..75aee18f6163 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -84,6 +84,36 @@ out: return found; } +/* ima_read_write_check - reflect possible reading/writing errors in the PCR. + * + * When opening a file for read, if the file is already open for write, + * the file could change, resulting in a file measurement error. + * + * Opening a file for write, if the file is already open for read, results + * in a time of measure, time of use (ToMToU) error. + * + * In either case invalidate the PCR. + */ +enum iint_pcr_error { TOMTOU, OPEN_WRITERS }; +static void ima_read_write_check(enum iint_pcr_error error, + struct ima_iint_cache *iint, + struct inode *inode, + const unsigned char *filename) +{ + switch (error) { + case TOMTOU: + if (iint->readcount > 0) + ima_add_violation(inode, filename, "invalid_pcr", + "ToMToU"); + break; + case OPEN_WRITERS: + if (iint->writecount > 0) + ima_add_violation(inode, filename, "invalid_pcr", + "open_writers"); + break; + } +} + /* * Update the counts given an fmode_t */ @@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode) iint->writecount++; } +/* + * ima_counts_get - increment file counts + * + * Maintain read/write counters for all files, but only + * invalidate the PCR for measured files: + * - Opening a file for write when already open for read, + * results in a time of measure, time of use (ToMToU) error. + * - Opening a file for read when already open for write, + * could result in a file measurement error. + * + */ +void ima_counts_get(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + fmode_t mode = file->f_mode; + struct ima_iint_cache *iint; + int rc; + + if (!ima_initialized || !S_ISREG(inode->i_mode)) + return; + iint = ima_iint_find_get(inode); + if (!iint) + return; + mutex_lock(&iint->mutex); + rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); + if (rc < 0) + goto out; + + if (mode & FMODE_WRITE) { + ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name); + goto out; + } + ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name); +out: + ima_inc_counts(iint, file->f_mode); + mutex_unlock(&iint->mutex); + + kref_put(&iint->refcount, iint_free); +} + /* * Decrement ima counts */ @@ -153,123 +224,6 @@ void ima_file_free(struct file *file) kref_put(&iint->refcount, iint_free); } -/* ima_read_write_check - reflect possible reading/writing errors in the PCR. - * - * When opening a file for read, if the file is already open for write, - * the file could change, resulting in a file measurement error. - * - * Opening a file for write, if the file is already open for read, results - * in a time of measure, time of use (ToMToU) error. - * - * In either case invalidate the PCR. - */ -enum iint_pcr_error { TOMTOU, OPEN_WRITERS }; -static void ima_read_write_check(enum iint_pcr_error error, - struct ima_iint_cache *iint, - struct inode *inode, - const unsigned char *filename) -{ - switch (error) { - case TOMTOU: - if (iint->readcount > 0) - ima_add_violation(inode, filename, "invalid_pcr", - "ToMToU"); - break; - case OPEN_WRITERS: - if (iint->writecount > 0) - ima_add_violation(inode, filename, "invalid_pcr", - "open_writers"); - break; - } -} - -static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, - const unsigned char *filename) -{ - int rc = 0; - - ima_inc_counts(iint, file->f_mode); - - rc = ima_collect_measurement(iint, file); - if (!rc) - ima_store_measurement(iint, file, filename); - return rc; -} - -/** - * ima_path_check - based on policy, collect/store measurement. - * @path: contains a pointer to the path to be measured - * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE - * - * Measure the file being open for readonly, based on the - * ima_must_measure() policy decision. - * - * Keep read/write counters for all files, but only - * invalidate the PCR for measured files: - * - Opening a file for write when already open for read, - * results in a time of measure, time of use (ToMToU) error. - * - Opening a file for read when already open for write, - * could result in a file measurement error. - * - * Always return 0 and audit dentry_open failures. - * (Return code will be based upon measurement appraisal.) - */ -int ima_path_check(struct path *path, int mask) -{ - struct inode *inode = path->dentry->d_inode; - struct ima_iint_cache *iint; - struct file *file = NULL; - int rc; - - if (!ima_initialized || !S_ISREG(inode->i_mode)) - return 0; - iint = ima_iint_find_get(inode); - if (!iint) - return 0; - - mutex_lock(&iint->mutex); - - rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); - if (rc < 0) - goto out; - - if ((mask & MAY_WRITE) || (mask == 0)) - ima_read_write_check(TOMTOU, iint, inode, - path->dentry->d_name.name); - - if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ) - goto out; - - ima_read_write_check(OPEN_WRITERS, iint, inode, - path->dentry->d_name.name); - if (!(iint->flags & IMA_MEASURED)) { - struct dentry *dentry = dget(path->dentry); - struct vfsmount *mnt = mntget(path->mnt); - - file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE, - current_cred()); - if (IS_ERR(file)) { - int audit_info = 0; - - integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - dentry->d_name.name, - "add_measurement", - "dentry_open failed", - 1, audit_info); - file = NULL; - goto out; - } - rc = get_path_measurement(iint, file, dentry->d_name.name); - } -out: - mutex_unlock(&iint->mutex); - if (file) - fput(file); - kref_put(&iint->refcount, iint_free); - return 0; -} -EXPORT_SYMBOL_GPL(ima_path_check); - static int process_measurement(struct file *file, const unsigned char *filename, int mask, int function) { @@ -297,33 +251,6 @@ out: return rc; } -/* - * ima_counts_get - increment file counts - * - * - for IPC shm and shmat file. - * - for nfsd exported files. - * - * Increment the counts for these files to prevent unnecessary - * imbalance messages. - */ -void ima_counts_get(struct file *file) -{ - struct inode *inode = file->f_dentry->d_inode; - struct ima_iint_cache *iint; - - if (!ima_initialized || !S_ISREG(inode->i_mode)) - return; - iint = ima_iint_find_get(inode); - if (!iint) - return; - mutex_lock(&iint->mutex); - ima_inc_counts(iint, file->f_mode); - mutex_unlock(&iint->mutex); - - kref_put(&iint->refcount, iint_free); -} -EXPORT_SYMBOL_GPL(ima_counts_get); - /** * ima_file_mmap - based on policy, collect/store measurement. * @file: pointer to the file to be measured (May be NULL) @@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm) return 0; } +/** + * ima_path_check - based on policy, collect/store measurement. + * @file: pointer to the file to be measured + * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE + * + * Measure files based on the ima_must_measure() policy decision. + * + * Always return 0 and audit dentry_open failures. + * (Return code will be based upon measurement appraisal.) + */ +int ima_path_check(struct file *file, int mask) +{ + int rc; + + rc = process_measurement(file, file->f_dentry->d_name.name, + mask & (MAY_READ | MAY_WRITE | MAY_EXEC), + PATH_CHECK); + return 0; +} +EXPORT_SYMBOL_GPL(ima_path_check); + static int __init init_ima(void) { int error; -- cgit v1.2.3 From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 26 Jan 2010 17:02:40 -0500 Subject: ima: rename ima_path_check to ima_file_check ima_path_check actually deals with files! call it ima_file_check instead. Signed-off-by: Eric Paris Acked-by: Mimi Zohar Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- fs/nfsd/vfs.c | 2 +- include/linux/ima.h | 4 ++-- security/integrity/ima/ima_main.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index cd77b6375efd..d62fdc875f22 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1736,7 +1736,7 @@ do_last: if (nd.root.mnt) path_put(&nd.root); if (!IS_ERR(filp)) { - error = ima_path_check(filp, acc_mode); + error = ima_file_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); @@ -1796,7 +1796,7 @@ ok: } filp = nameidata_to_filp(&nd); if (!IS_ERR(filp)) { - error = ima_path_check(filp, acc_mode); + error = ima_file_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 32477e3a645c..97d79eff6b7f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_path_check(*filp, access); + host_err = ima_file_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: diff --git a/include/linux/ima.h b/include/linux/ima.h index aa55a8f1f5b9..975837e7d6c0 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask) { return 0; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index eb1cf6498cc9..b76e1f03ea2b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -14,7 +14,7 @@ * * File: ima_main.c * implements the IMA hooks: ima_bprm_check, ima_file_mmap, - * and ima_path_check. + * and ima_file_check. */ #include #include @@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm) * Always return 0 and audit dentry_open failures. * (Return code will be based upon measurement appraisal.) */ -int ima_path_check(struct file *file, int mask) +int ima_file_check(struct file *file, int mask) { int rc; @@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask) PATH_CHECK); return 0; } -EXPORT_SYMBOL_GPL(ima_path_check); +EXPORT_SYMBOL_GPL(ima_file_check); static int __init init_ima(void) { -- cgit v1.2.3 From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Jan 2010 17:09:38 +0300 Subject: kfifo: Don't use integer as NULL pointer This patch fixes following sparse warnings: include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer Signed-off-by: Anton Vorontsov Acked-by: Stefani Seibold Signed-off-by: Greg Kroah-Hartman --- include/linux/kfifo.h | 2 +- kernel/kfifo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 6f6c5f300af6..bc0fc795bd35 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo, */ static inline bool kfifo_initialized(struct kfifo *fifo) { - return fifo->buffer != 0; + return fifo->buffer != NULL; } /** diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 559fb5582b60..35edbe22e9a9 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) buffer = kmalloc(size, gfp_mask); if (!buffer) { - _kfifo_init(fifo, 0, 0); + _kfifo_init(fifo, NULL, 0); return -ENOMEM; } -- cgit v1.2.3