diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 03:29:53 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 03:29:53 +0400 |
commit | a568fa1c91d4f0e565345119d0b942cc7f74ca3d (patch) | |
tree | 8d57522244da002e44b2f79cc2b376799630ba83 | |
parent | 1a9c3d68d65f4b5ce32f7d67ccc730396e04cdd2 (diff) | |
parent | c2853c8df57f49620d26f317d7d43347c29bfc2e (diff) | |
download | linux-a568fa1c91d4f0e565345119d0b942cc7f74ca3d.tar.xz |
Merge branch 'akpm' (updates from Andrew Morton)
Merge misc fixes from Andrew Morton:
"Bunch of fixes and one little addition to math64.h"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (27 commits)
include/linux/math64.h: add div64_ul()
mm: memcontrol: fix lockless reclaim hierarchy iterator
frontswap: fix incorrect zeroing and allocation size for frontswap_map
kernel/audit_tree.c:audit_add_tree_rule(): protect `rule' from kill_rules()
mm: migration: add migrate_entry_wait_huge()
ocfs2: add missing lockres put in dlm_mig_lockres_handler
mm/page_alloc.c: fix watermark check in __zone_watermark_ok()
drivers/misc/sgi-gru/grufile.c: fix info leak in gru_get_config_info()
aio: fix io_destroy() regression by using call_rcu()
rtc-at91rm9200: use shadow IMR on at91sam9x5
rtc-at91rm9200: add shadow interrupt mask
rtc-at91rm9200: refactor interrupt-register handling
rtc-at91rm9200: add configuration support
rtc-at91rm9200: add match-table compile guard
fs/ocfs2/namei.c: remove unecessary ERROR when removing non-empty directory
swap: avoid read_swap_cache_async() race to deadlock while waiting on discard I/O completion
drivers/rtc/rtc-twl.c: fix missing device_init_wakeup() when booted with device tree
cciss: fix broken mutex usage in ioctl
audit: wait_for_auditd() should use TASK_UNINTERRUPTIBLE
drivers/rtc/rtc-cmos.c: fix accidentally enabling rtc channel
...
-rw-r--r-- | Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt | 2 | ||||
-rw-r--r-- | drivers/block/cciss.c | 32 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 1 | ||||
-rw-r--r-- | drivers/rtc/rtc-at91rm9200.c | 131 | ||||
-rw-r--r-- | drivers/rtc/rtc-cmos.c | 4 | ||||
-rw-r--r-- | drivers/rtc/rtc-tps6586x.c | 3 | ||||
-rw-r--r-- | drivers/rtc/rtc-twl.c | 1 | ||||
-rw-r--r-- | fs/aio.c | 36 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 4 | ||||
-rw-r--r-- | fs/proc/kmsg.c | 10 | ||||
-rw-r--r-- | include/linux/cpu.h | 4 | ||||
-rw-r--r-- | include/linux/math64.h | 6 | ||||
-rw-r--r-- | include/linux/swapops.h | 3 | ||||
-rw-r--r-- | include/linux/syslog.h | 4 | ||||
-rw-r--r-- | kernel/audit.c | 2 | ||||
-rw-r--r-- | kernel/audit_tree.c | 1 | ||||
-rw-r--r-- | kernel/cpu.c | 55 | ||||
-rw-r--r-- | kernel/printk.c | 91 | ||||
-rw-r--r-- | kernel/sys.c | 29 | ||||
-rw-r--r-- | lib/mpi/mpicoder.c | 2 | ||||
-rw-r--r-- | mm/frontswap.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 14 | ||||
-rw-r--r-- | mm/migrate.c | 23 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 | ||||
-rw-r--r-- | mm/swap_state.c | 18 | ||||
-rw-r--r-- | mm/swapfile.c | 2 |
28 files changed, 321 insertions, 168 deletions
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt index 2a3feabd3b22..34c1505774bf 100644 --- a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt +++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt @@ -1,7 +1,7 @@ Atmel AT91RM9200 Real Time Clock Required properties: -- compatible: should be: "atmel,at91rm9200-rtc" +- compatible: should be: "atmel,at91rm9200-rtc" or "atmel,at91sam9x5-rtc" - reg: physical base address of the controller and length of memory mapped region. - interrupts: rtc alarm/event interrupt diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 6374dc103521..62b6c2cc80b5 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -168,8 +168,6 @@ static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id); static int cciss_open(struct block_device *bdev, fmode_t mode); static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode); static void cciss_release(struct gendisk *disk, fmode_t mode); -static int do_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); static int cciss_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -235,7 +233,7 @@ static const struct block_device_operations cciss_fops = { .owner = THIS_MODULE, .open = cciss_unlocked_open, .release = cciss_release, - .ioctl = do_ioctl, + .ioctl = cciss_ioctl, .getgeo = cciss_getgeo, #ifdef CONFIG_COMPAT .compat_ioctl = cciss_compat_ioctl, @@ -1143,16 +1141,6 @@ static void cciss_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&cciss_mutex); } -static int do_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long arg) -{ - int ret; - mutex_lock(&cciss_mutex); - ret = cciss_ioctl(bdev, mode, cmd, arg); - mutex_unlock(&cciss_mutex); - return ret; -} - #ifdef CONFIG_COMPAT static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, @@ -1179,7 +1167,7 @@ static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode, case CCISS_REGNEWD: case CCISS_RESCANDISK: case CCISS_GETLUNINFO: - return do_ioctl(bdev, mode, cmd, arg); + return cciss_ioctl(bdev, mode, cmd, arg); case CCISS_PASSTHRU32: return cciss_ioctl32_passthru(bdev, mode, cmd, arg); @@ -1219,7 +1207,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, if (err) return -EFAULT; - err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p); + err = cciss_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p); if (err) return err; err |= @@ -1261,7 +1249,7 @@ static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode, if (err) return -EFAULT; - err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p); + err = cciss_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p); if (err) return err; err |= @@ -1311,11 +1299,14 @@ static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp) static int cciss_getintinfo(ctlr_info_t *h, void __user *argp) { cciss_coalint_struct intinfo; + unsigned long flags; if (!argp) return -EINVAL; + spin_lock_irqsave(&h->lock, flags); intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay); intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount); + spin_unlock_irqrestore(&h->lock, flags); if (copy_to_user (argp, &intinfo, sizeof(cciss_coalint_struct))) return -EFAULT; @@ -1356,12 +1347,15 @@ static int cciss_setintinfo(ctlr_info_t *h, void __user *argp) static int cciss_getnodename(ctlr_info_t *h, void __user *argp) { NodeName_type NodeName; + unsigned long flags; int i; if (!argp) return -EINVAL; + spin_lock_irqsave(&h->lock, flags); for (i = 0; i < 16; i++) NodeName[i] = readb(&h->cfgtable->ServerName[i]); + spin_unlock_irqrestore(&h->lock, flags); if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) return -EFAULT; return 0; @@ -1398,10 +1392,13 @@ static int cciss_setnodename(ctlr_info_t *h, void __user *argp) static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) { Heartbeat_type heartbeat; + unsigned long flags; if (!argp) return -EINVAL; + spin_lock_irqsave(&h->lock, flags); heartbeat = readl(&h->cfgtable->HeartBeat); + spin_unlock_irqrestore(&h->lock, flags); if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type))) return -EFAULT; return 0; @@ -1410,10 +1407,13 @@ static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) static int cciss_getbustypes(ctlr_info_t *h, void __user *argp) { BusTypes_type BusTypes; + unsigned long flags; if (!argp) return -EINVAL; + spin_lock_irqsave(&h->lock, flags); BusTypes = readl(&h->cfgtable->BusTypes); + spin_unlock_irqrestore(&h->lock, flags); if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type))) return -EFAULT; return 0; diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 44d273c5e19d..0535d1e0bc78 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -172,6 +172,7 @@ static long gru_get_config_info(unsigned long arg) nodesperblade = 2; else nodesperblade = 1; + memset(&info, 0, sizeof(info)); info.cpus = num_online_cpus(); info.nodes = num_online_nodes(); info.blades = info.nodes / nodesperblade; diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 0eab77b22340..f296f3f7db9b 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -25,6 +25,7 @@ #include <linux/rtc.h> #include <linux/bcd.h> #include <linux/interrupt.h> +#include <linux/spinlock.h> #include <linux/ioctl.h> #include <linux/completion.h> #include <linux/io.h> @@ -42,10 +43,65 @@ #define AT91_RTC_EPOCH 1900UL /* just like arch/arm/common/rtctime.c */ +struct at91_rtc_config { + bool use_shadow_imr; +}; + +static const struct at91_rtc_config *at91_rtc_config; static DECLARE_COMPLETION(at91_rtc_updated); static unsigned int at91_alarm_year = AT91_RTC_EPOCH; static void __iomem *at91_rtc_regs; static int irq; +static DEFINE_SPINLOCK(at91_rtc_lock); +static u32 at91_rtc_shadow_imr; + +static void at91_rtc_write_ier(u32 mask) +{ + unsigned long flags; + + spin_lock_irqsave(&at91_rtc_lock, flags); + at91_rtc_shadow_imr |= mask; + at91_rtc_write(AT91_RTC_IER, mask); + spin_unlock_irqrestore(&at91_rtc_lock, flags); +} + +static void at91_rtc_write_idr(u32 mask) +{ + unsigned long flags; + + spin_lock_irqsave(&at91_rtc_lock, flags); + at91_rtc_write(AT91_RTC_IDR, mask); + /* + * Register read back (of any RTC-register) needed to make sure + * IDR-register write has reached the peripheral before updating + * shadow mask. + * + * Note that there is still a possibility that the mask is updated + * before interrupts have actually been disabled in hardware. The only + * way to be certain would be to poll the IMR-register, which is is + * the very register we are trying to emulate. The register read back + * is a reasonable heuristic. + */ + at91_rtc_read(AT91_RTC_SR); + at91_rtc_shadow_imr &= ~mask; + spin_unlock_irqrestore(&at91_rtc_lock, flags); +} + +static u32 at91_rtc_read_imr(void) +{ + unsigned long flags; + u32 mask; + + if (at91_rtc_config->use_shadow_imr) { + spin_lock_irqsave(&at91_rtc_lock, flags); + mask = at91_rtc_shadow_imr; + spin_unlock_irqrestore(&at91_rtc_lock, flags); + } else { + mask = at91_rtc_read(AT91_RTC_IMR); + } + + return mask; +} /* * Decode time/date into rtc_time structure @@ -110,9 +166,9 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm) cr = at91_rtc_read(AT91_RTC_CR); at91_rtc_write(AT91_RTC_CR, cr | AT91_RTC_UPDCAL | AT91_RTC_UPDTIM); - at91_rtc_write(AT91_RTC_IER, AT91_RTC_ACKUPD); + at91_rtc_write_ier(AT91_RTC_ACKUPD); wait_for_completion(&at91_rtc_updated); /* wait for ACKUPD interrupt */ - at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD); + at91_rtc_write_idr(AT91_RTC_ACKUPD); at91_rtc_write(AT91_RTC_TIMR, bin2bcd(tm->tm_sec) << 0 @@ -144,7 +200,7 @@ static int at91_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year); tm->tm_year = at91_alarm_year - 1900; - alrm->enabled = (at91_rtc_read(AT91_RTC_IMR) & AT91_RTC_ALARM) + alrm->enabled = (at91_rtc_read_imr() & AT91_RTC_ALARM) ? 1 : 0; dev_dbg(dev, "%s(): %4d-%02d-%02d %02d:%02d:%02d\n", __func__, @@ -169,7 +225,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) tm.tm_min = alrm->time.tm_min; tm.tm_sec = alrm->time.tm_sec; - at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM); + at91_rtc_write_idr(AT91_RTC_ALARM); at91_rtc_write(AT91_RTC_TIMALR, bin2bcd(tm.tm_sec) << 0 | bin2bcd(tm.tm_min) << 8 @@ -182,7 +238,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) if (alrm->enabled) { at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM); - at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM); + at91_rtc_write_ier(AT91_RTC_ALARM); } dev_dbg(dev, "%s(): %4d-%02d-%02d %02d:%02d:%02d\n", __func__, @@ -198,9 +254,9 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) if (enabled) { at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM); - at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM); + at91_rtc_write_ier(AT91_RTC_ALARM); } else - at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM); + at91_rtc_write_idr(AT91_RTC_ALARM); return 0; } @@ -209,7 +265,7 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) */ static int at91_rtc_proc(struct device *dev, struct seq_file *seq) { - unsigned long imr = at91_rtc_read(AT91_RTC_IMR); + unsigned long imr = at91_rtc_read_imr(); seq_printf(seq, "update_IRQ\t: %s\n", (imr & AT91_RTC_ACKUPD) ? "yes" : "no"); @@ -229,7 +285,7 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) unsigned int rtsr; unsigned long events = 0; - rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_read(AT91_RTC_IMR); + rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_read_imr(); if (rtsr) { /* this interrupt is shared! Is it ours? */ if (rtsr & AT91_RTC_ALARM) events |= (RTC_AF | RTC_IRQF); @@ -250,6 +306,43 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) return IRQ_NONE; /* not handled */ } +static const struct at91_rtc_config at91rm9200_config = { +}; + +static const struct at91_rtc_config at91sam9x5_config = { + .use_shadow_imr = true, +}; + +#ifdef CONFIG_OF +static const struct of_device_id at91_rtc_dt_ids[] = { + { + .compatible = "atmel,at91rm9200-rtc", + .data = &at91rm9200_config, + }, { + .compatible = "atmel,at91sam9x5-rtc", + .data = &at91sam9x5_config, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, at91_rtc_dt_ids); +#endif + +static const struct at91_rtc_config * +at91_rtc_get_config(struct platform_device *pdev) +{ + const struct of_device_id *match; + + if (pdev->dev.of_node) { + match = of_match_node(at91_rtc_dt_ids, pdev->dev.of_node); + if (!match) + return NULL; + return (const struct at91_rtc_config *)match->data; + } + + return &at91rm9200_config; +} + static const struct rtc_class_ops at91_rtc_ops = { .read_time = at91_rtc_readtime, .set_time = at91_rtc_settime, @@ -268,6 +361,10 @@ static int __init at91_rtc_probe(struct platform_device *pdev) struct resource *regs; int ret = 0; + at91_rtc_config = at91_rtc_get_config(pdev); + if (!at91_rtc_config) + return -ENODEV; + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) { dev_err(&pdev->dev, "no mmio resource defined\n"); @@ -290,7 +387,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) at91_rtc_write(AT91_RTC_MR, 0); /* 24 hour mode */ /* Disable all interrupts */ - at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | + at91_rtc_write_idr(AT91_RTC_ACKUPD | AT91_RTC_ALARM | AT91_RTC_SECEV | AT91_RTC_TIMEV | AT91_RTC_CALEV); @@ -335,7 +432,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) struct rtc_device *rtc = platform_get_drvdata(pdev); /* Disable all interrupts */ - at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | + at91_rtc_write_idr(AT91_RTC_ACKUPD | AT91_RTC_ALARM | AT91_RTC_SECEV | AT91_RTC_TIMEV | AT91_RTC_CALEV); free_irq(irq, pdev); @@ -358,13 +455,13 @@ static int at91_rtc_suspend(struct device *dev) /* this IRQ is shared with DBGU and other hardware which isn't * necessarily doing PM like we are... */ - at91_rtc_imr = at91_rtc_read(AT91_RTC_IMR) + at91_rtc_imr = at91_rtc_read_imr() & (AT91_RTC_ALARM|AT91_RTC_SECEV); if (at91_rtc_imr) { if (device_may_wakeup(dev)) enable_irq_wake(irq); else - at91_rtc_write(AT91_RTC_IDR, at91_rtc_imr); + at91_rtc_write_idr(at91_rtc_imr); } return 0; } @@ -375,7 +472,7 @@ static int at91_rtc_resume(struct device *dev) if (device_may_wakeup(dev)) disable_irq_wake(irq); else - at91_rtc_write(AT91_RTC_IER, at91_rtc_imr); + at91_rtc_write_ier(at91_rtc_imr); } return 0; } @@ -383,12 +480,6 @@ static int at91_rtc_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); -static const struct of_device_id at91_rtc_dt_ids[] = { - { .compatible = "atmel,at91rm9200-rtc" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, at91_rtc_dt_ids); - static struct platform_driver at91_rtc_driver = { .remove = __exit_p(at91_rtc_remove), .driver = { diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index cc5bea9c4b1c..f1cb706445c7 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -854,6 +854,9 @@ static int cmos_resume(struct device *dev) } spin_lock_irq(&rtc_lock); + if (device_may_wakeup(dev)) + hpet_rtc_timer_init(); + do { CMOS_WRITE(tmp, RTC_CONTROL); hpet_set_rtc_irq_bit(tmp & RTC_IRQMASK); @@ -869,7 +872,6 @@ static int cmos_resume(struct device *dev) rtc_update_irq(cmos->rtc, 1, mask); tmp &= ~RTC_AIE; hpet_mask_rtc_irq_bit(RTC_AIE); - hpet_rtc_timer_init(); } while (mask & RTC_AIE); spin_unlock_irq(&rtc_lock); } diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index 459c2ffc95a6..426901cef14f 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -273,6 +273,8 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) return ret; } + device_init_wakeup(&pdev->dev, 1); + platform_set_drvdata(pdev, rtc); rtc->rtc = devm_rtc_device_register(&pdev->dev, dev_name(&pdev->dev), &tps6586x_rtc_ops, THIS_MODULE); @@ -292,7 +294,6 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) goto fail_rtc_register; } disable_irq(rtc->irq); - device_set_wakeup_capable(&pdev->dev, 1); return 0; fail_rtc_register: diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 8751a5240c99..b2eab34f38d9 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -524,6 +524,7 @@ static int twl_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, rtc); + device_init_wakeup(&pdev->dev, 1); return 0; out2: @@ -141,9 +141,6 @@ static void aio_free_ring(struct kioctx *ctx) for (i = 0; i < ctx->nr_pages; i++) put_page(ctx->ring_pages[i]); - if (ctx->mmap_size) - vm_munmap(ctx->mmap_base, ctx->mmap_size); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); } @@ -322,11 +319,6 @@ static void free_ioctx(struct kioctx *ctx) aio_free_ring(ctx); - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); - pr_debug("freeing %p\n", ctx); /* @@ -435,17 +427,24 @@ static void kill_ioctx(struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { hlist_del_rcu(&ctx->list); - /* Between hlist_del_rcu() and dropping the initial ref */ - synchronize_rcu(); /* - * We can't punt to workqueue here because put_ioctx() -> - * free_ioctx() will unmap the ringbuffer, and that has to be - * done in the original process's context. kill_ioctx_rcu/work() - * exist for exit_aio(), as in that path free_ioctx() won't do - * the unmap. + * It'd be more correct to do this in free_ioctx(), after all + * the outstanding kiocbs have finished - but by then io_destroy + * has already returned, so io_setup() could potentially return + * -EAGAIN with no ioctxs actually in use (as far as userspace + * could tell). */ - kill_ioctx_work(&ctx->rcu_work); + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - ctx->max_reqs > aio_nr); + aio_nr -= ctx->max_reqs; + spin_unlock(&aio_nr_lock); + + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + + /* Between hlist_del_rcu() and dropping the initial ref */ + call_rcu(&ctx->rcu_head, kill_ioctx_rcu); } } @@ -495,10 +494,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - if (!atomic_xchg(&ctx->dead, 1)) { - hlist_del_rcu(&ctx->list); - call_rcu(&ctx->rcu_head, kill_ioctx_rcu); - } + kill_ioctx(ctx); } } diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index b3fdd1a323d6..e68588e6b1e8 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1408,6 +1408,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, mres->lockname_len, mres->lockname); ret = -EFAULT; spin_unlock(&res->spinlock); + dlm_lockres_put(res); goto leave; } res->state |= DLM_LOCK_RES_MIGRATING; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 04ee1b57c243..b4a5cdf9dbc5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -947,7 +947,7 @@ leave: ocfs2_free_dir_lookup_result(&orphan_insert); ocfs2_free_dir_lookup_result(&lookup); - if (status) + if (status && (status != -ENOTEMPTY)) mlog_errno(status); return status; @@ -2216,7 +2216,7 @@ out: brelse(orphan_dir_bh); - return 0; + return ret; } int ocfs2_create_inode_in_orphan(struct inode *dir, diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bd4b5a740ff1..bdfabdaefdce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait; static int kmsg_open(struct inode * inode, struct file * file) { - return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC); } static int kmsg_release(struct inode * inode, struct file * file) { - (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE); + (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC); return 0; } @@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { if ((file->f_flags & O_NONBLOCK) && - !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return -EAGAIN; - return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC); } static unsigned int kmsg_poll(struct file *file, poll_table *wait) { poll_wait(file, &log_wait, wait); - if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return POLLIN | POLLRDNORM; return 0; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c6f6e0839b61..9f3c7e81270a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys; extern void get_online_cpus(void); extern void put_online_cpus(void); +extern void cpu_hotplug_disable(void); +extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) @@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_unlock(void) #define get_online_cpus() do { } while (0) #define put_online_cpus() do { } while (0) +#define cpu_hotplug_disable() do { } while (0) +#define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/include/linux/math64.h b/include/linux/math64.h index b8ba85544721..2913b86eb12a 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -6,7 +6,8 @@ #if BITS_PER_LONG == 64 -#define div64_long(x,y) div64_s64((x),(y)) +#define div64_long(x, y) div64_s64((x), (y)) +#define div64_ul(x, y) div64_u64((x), (y)) /** * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder @@ -47,7 +48,8 @@ static inline s64 div64_s64(s64 dividend, s64 divisor) #elif BITS_PER_LONG == 32 -#define div64_long(x,y) div_s64((x),(y)) +#define div64_long(x, y) div_s64((x), (y)) +#define div64_ul(x, y) div_u64((x), (y)) #ifndef div_u64_rem static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 47ead515c811..c5fd30d2a415 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -137,6 +137,7 @@ static inline void make_migration_entry_read(swp_entry_t *entry) extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); +extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); #else #define make_migration_entry(page, write) swp_entry(0, 0) @@ -148,6 +149,8 @@ static inline int is_migration_entry(swp_entry_t swp) static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } +static inline void migration_entry_wait_huge(struct mm_struct *mm, + pte_t *pte) { } static inline int is_write_migration_entry(swp_entry_t entry) { return 0; diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 38911391a139..98a3153c0f96 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -44,8 +44,8 @@ /* Return size of the log buffer */ #define SYSLOG_ACTION_SIZE_BUFFER 10 -#define SYSLOG_FROM_CALL 0 -#define SYSLOG_FROM_FILE 1 +#define SYSLOG_FROM_READER 0 +#define SYSLOG_FROM_PROC 1 int do_syslog(int type, char __user *buf, int count, bool from_file); diff --git a/kernel/audit.c b/kernel/audit.c index 21c7fa615bd3..91e53d04b6a9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, static void wait_for_auditd(unsigned long sleep_time) { DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&audit_backlog_wait, &wait); if (audit_backlog_limit && diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a291aa23fb3f..43c307dc9453 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule) struct vfsmount *mnt; int err; + rule->tree = NULL; list_for_each_entry(tree, &tree_list, list) { if (!strcmp(seed->pathname, tree->pathname)) { put_tree(seed); diff --git a/kernel/cpu.c b/kernel/cpu.c index b5e4ab2d427e..198a38883e64 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -133,6 +133,27 @@ static void cpu_hotplug_done(void) mutex_unlock(&cpu_hotplug.lock); } +/* + * Wait for currently running CPU hotplug operations to complete (if any) and + * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects + * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the + * hotplug path before performing hotplug operations. So acquiring that lock + * guarantees mutual exclusion from any currently running hotplug operations. + */ +void cpu_hotplug_disable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + +void cpu_hotplug_enable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + #else /* #if CONFIG_HOTPLUG_CPU */ static void cpu_hotplug_begin(void) {} static void cpu_hotplug_done(void) {} @@ -541,36 +562,6 @@ static int __init alloc_frozen_cpus(void) core_initcall(alloc_frozen_cpus); /* - * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU - * hotplug when tasks are about to be frozen. Also, don't allow the freezer - * to continue until any currently running CPU hotplug operation gets - * completed. - * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the - * 'cpu_add_remove_lock'. And this same lock is also taken by the regular - * CPU hotplug path and released only after it is complete. Thus, we - * (and hence the freezer) will block here until any currently running CPU - * hotplug operation gets completed. - */ -void cpu_hotplug_disable_before_freeze(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 1; - cpu_maps_update_done(); -} - - -/* - * When tasks have been thawed, re-enable regular CPU hotplug (which had been - * disabled while beginning to freeze tasks). - */ -void cpu_hotplug_enable_after_thaw(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 0; - cpu_maps_update_done(); -} - -/* * When callbacks for CPU hotplug notifications are being executed, we must * ensure that the state of the system with respect to the tasks being frozen * or not, as reported by the notification, remains unchanged *throughout the @@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - cpu_hotplug_disable_before_freeze(); + cpu_hotplug_disable(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - cpu_hotplug_enable_after_thaw(); + cpu_hotplug_enable(); break; default: diff --git a/kernel/printk.c b/kernel/printk.c index fa36e1494420..8212c1aef125 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -363,6 +363,53 @@ static void log_store(int facility, int level, log_next_seq++; } +#ifdef CONFIG_SECURITY_DMESG_RESTRICT +int dmesg_restrict = 1; +#else +int dmesg_restrict; +#endif + +static int syslog_action_restricted(int type) +{ + if (dmesg_restrict) + return 1; + /* + * Unless restricted, we allow "read all" and "get buffer size" + * for everybody. + */ + return type != SYSLOG_ACTION_READ_ALL && + type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ + /* + * If this is from /proc/kmsg and we've already opened it, then we've + * already done the capabilities checks at open time. + */ + if (from_file && type != SYSLOG_ACTION_OPEN) + return 0; + + if (syslog_action_restricted(type)) { + if (capable(CAP_SYSLOG)) + return 0; + /* + * For historical reasons, accept CAP_SYS_ADMIN too, with + * a warning. + */ + if (capable(CAP_SYS_ADMIN)) { + pr_warn_once("%s (%d): Attempt to access syslog with " + "CAP_SYS_ADMIN but no CAP_SYSLOG " + "(deprecated).\n", + current->comm, task_pid_nr(current)); + return 0; + } + return -EPERM; + } + return security_syslog(type); +} + + /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; @@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file) if ((file->f_flags & O_ACCMODE) == O_WRONLY) return 0; - err = security_syslog(SYSLOG_ACTION_READ_ALL); + err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); if (err) return err; @@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level) } #endif -#ifdef CONFIG_SECURITY_DMESG_RESTRICT -int dmesg_restrict = 1; -#else -int dmesg_restrict; -#endif - -static int syslog_action_restricted(int type) -{ - if (dmesg_restrict) - return 1; - /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ - return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; -} - -static int check_syslog_permissions(int type, bool from_file) -{ - /* - * If this is from /proc/kmsg and we've already opened it, then we've - * already done the capabilities checks at open time. - */ - if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; - - if (syslog_action_restricted(type)) { - if (capable(CAP_SYSLOG)) - return 0; - /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ - if (capable(CAP_SYS_ADMIN)) { - printk_once(KERN_WARNING "%s (%d): " - "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n", - current->comm, task_pid_nr(current)); - return 0; - } - return -EPERM; - } - return 0; -} - #if defined(CONFIG_PRINTK_TIME) static bool printk_time = 1; #else @@ -1249,7 +1258,7 @@ out: SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { - return do_syslog(type, buf, len, SYSLOG_FROM_CALL); + return do_syslog(type, buf, len, SYSLOG_FROM_READER); } /* diff --git a/kernel/sys.c b/kernel/sys.c index b95d3c72ba21..2bbd9a73b54c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_reboot_notifier); +/* Add backwards compatibility for stable trees. */ +#ifndef PF_NO_SETAFFINITY +#define PF_NO_SETAFFINITY PF_THREAD_BOUND +#endif + +static void migrate_to_reboot_cpu(void) +{ + /* The boot cpu is always logical cpu 0 */ + int cpu = 0; + + cpu_hotplug_disable(); + + /* Make certain the cpu I'm about to reboot on is online */ + if (!cpu_online(cpu)) + cpu = cpumask_first(cpu_online_mask); + + /* Prevent races with other tasks migrating this task */ + current->flags |= PF_NO_SETAFFINITY; + + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed_ptr(current, cpumask_of(cpu)); +} + /** * kernel_restart - reboot the system * @cmd: pointer to buffer containing command to execute for restart @@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); @@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); @@ -419,7 +442,7 @@ void kernel_power_off(void) kernel_shutdown_prepare(SYSTEM_POWER_OFF); if (pm_power_off_prepare) pm_power_off_prepare(); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); printk(KERN_EMERG "Power down.\n"); kmsg_dump(KMSG_DUMP_POWEROFF); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 5f9c44cdf1f5..4cc6442733f4 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -37,7 +37,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) mpi_limb_t a; MPI val = NULL; - while (nbytes >= 0 && buffer[0] == 0) { + while (nbytes > 0 && buffer[0] == 0) { buffer++; nbytes--; } diff --git a/mm/frontswap.c b/mm/frontswap.c index 538367ef1372..1b24bdcb3197 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -319,7 +319,7 @@ void __frontswap_invalidate_area(unsigned type) return; frontswap_ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); - memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + bitmap_zero(sis->frontswap_map, sis->max); } clear_bit(type, need_init); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..e2bfbf73a551 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2839,7 +2839,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (ptep) { entry = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait(mm, (pmd_t *)ptep, address); + migration_entry_wait_huge(mm, ptep); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 010d6c14129a..194721839cf5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1199,7 +1199,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, mz = mem_cgroup_zoneinfo(root, nid, zid); iter = &mz->reclaim_iter[reclaim->priority]; - last_visited = iter->last_visited; if (prev && reclaim->generation != iter->generation) { iter->last_visited = NULL; goto out_unlock; @@ -1218,13 +1217,12 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, * is alive. */ dead_count = atomic_read(&root->dead_count); - smp_rmb(); - last_visited = iter->last_visited; - if (last_visited) { - if ((dead_count != iter->last_dead_count) || - !css_tryget(&last_visited->css)) { + if (dead_count == iter->last_dead_count) { + smp_rmb(); + last_visited = iter->last_visited; + if (last_visited && + !css_tryget(&last_visited->css)) last_visited = NULL; - } } } @@ -3141,8 +3139,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) return -ENOMEM; } - INIT_WORK(&s->memcg_params->destroy, - kmem_cache_destroy_work_func); s->memcg_params->is_root_cache = true; /* diff --git a/mm/migrate.c b/mm/migrate.c index b1f57501de9c..6f0c24438bba 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -200,15 +200,14 @@ static void remove_migration_ptes(struct page *old, struct page *new) * get to the page and wait until migration is finished. * When we return from this function the fault will be retried. */ -void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, - unsigned long address) +static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { - pte_t *ptep, pte; - spinlock_t *ptl; + pte_t pte; swp_entry_t entry; struct page *page; - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + spin_lock(ptl); pte = *ptep; if (!is_swap_pte(pte)) goto out; @@ -236,6 +235,20 @@ out: pte_unmap_unlock(ptep, ptl); } +void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, + unsigned long address) +{ + spinlock_t *ptl = pte_lockptr(mm, pmd); + pte_t *ptep = pte_offset_map(pmd, address); + __migration_entry_wait(mm, ptep, ptl); +} + +void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) +{ + spinlock_t *ptl = &(mm)->page_table_lock; + __migration_entry_wait(mm, pte, ptl); +} + #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ static bool buffer_migrate_lock_buffers(struct buffer_head *head, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 378a15bcd649..c3edb624fccf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1628,6 +1628,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, long min = mark; long lowmem_reserve = z->lowmem_reserve[classzone_idx]; int o; + long free_cma = 0; free_pages -= (1 << order) - 1; if (alloc_flags & ALLOC_HIGH) @@ -1637,9 +1638,10 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, #ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */ if (!(alloc_flags & ALLOC_CMA)) - free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); + free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); #endif - if (free_pages <= min + lowmem_reserve) + + if (free_pages - free_cma <= min + lowmem_reserve) return false; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable */ diff --git a/mm/swap_state.c b/mm/swap_state.c index b3d40dcf3624..f24ab0dff554 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -336,8 +336,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * Swap entry may have been freed since our caller observed it. */ err = swapcache_prepare(entry); - if (err == -EEXIST) { /* seems racy */ + if (err == -EEXIST) { radix_tree_preload_end(); + /* + * We might race against get_swap_page() and stumble + * across a SWAP_HAS_CACHE swap_map entry whose page + * has not been brought into the swapcache yet, while + * the other end is scheduled away waiting on discard + * I/O completion at scan_swap_map(). + * + * In order to avoid turning this transitory state + * into a permanent loop around this -EEXIST case + * if !CONFIG_PREEMPT and the I/O completion happens + * to be waiting on the CPU waitqueue where we are now + * busy looping, we just conditionally invoke the + * scheduler here, if there are some more important + * tasks to run. + */ + cond_resched(); continue; } if (err) { /* swp entry is obsolete ? */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 6c340d908b27..746af55b8455 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2116,7 +2116,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } /* frontswap enabled? set up bit-per-page map for frontswap */ if (frontswap_enabled) - frontswap_map = vzalloc(maxpages / sizeof(long)); + frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |