From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 26 Jan 2012 17:03:19 +0100 Subject: perf: Fix broken interrupt rate throttling This patch fixes the sampling interrupt throttling mechanism. It was broken in v3.2. Events were not being unthrottled. The unthrottling mechanism required that events be checked at each timer tick. This patch solves this problem and also separates: - unthrottling - multiplexing - frequency-mode period adjustments Not all of them need to be executed at each timer tick. This third version of the patch is based on my original patch + PeterZ proposal (https://lkml.org/lkml/2012/1/7/87). At each timer tick, for each context: - if the current CPU has throttled events, we unthrottle events - if context has frequency-based events, we adjust sampling periods - if we have reached the jiffies interval, we multiplex (rotate) We decoupled rotation (multiplexing) from frequency-mode sampling period adjustments. They should not necessarily happen at the same rate. Multiplexing is subject to jiffies_interval (currently at 1 but could be higher once the tunable is exposed via sysfs). We have grouped frequency-mode adjustment and unthrottling into the same routine to minimize code duplication. When throttled while in frequency mode, we scan the events only once. We have fixed the threshold enforcement code in __perf_event_overflow(). There was a bug whereby it would allow more than the authorized rate because an increment of hwc->interrupts was not executed at the right place. The patch was tested with low sampling limit (2000) and fixed periods, frequency mode, overcommitted PMU. On a 2.1GHz AMD CPU: $ cat /proc/sys/kernel/perf_event_max_sample_rate 2000 We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000): $ perf record -e cycles,cycles -c 700000 noploop 10 $ perf report -D | tail -21 Aggregated stats: TOTAL events: 80086 MMAP events: 88 COMM events: 2 EXIT events: 4 THROTTLE events: 19996 UNTHROTTLE events: 19996 SAMPLE events: 40000 cycles stats: TOTAL events: 40006 MMAP events: 5 COMM events: 1 EXIT events: 4 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 cycles stats: TOTAL events: 39996 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 For 10s, the cap is 2x2000x10 = 40000 samples. We get exactly that: 20000 samples/event. Signed-off-by: Stephane Eranian Cc: # v3.2+ Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 08855613ceb3..abb2776be1ba 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -587,6 +587,7 @@ struct hw_perf_event { u64 sample_period; u64 last_period; local64_t period_left; + u64 interrupts_seq; u64 interrupts; u64 freq_time_stamp; -- cgit v1.2.3 From 1a30871fe635d3e92972e6b93e39ff65bb57e52d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 Jan 2012 11:07:16 +0200 Subject: mtd: fix MTD suspend Commits 3fe4bae88460869a8e553397cd9057a4ee7ca341 and 079c985e7a6f4ce60f931cebfdd5ee3c3 broke MTD suspend in 2 ways: 1. When the '->suspend' method is not present, we return -EOPNOTSUPP, but the callers of 'mtd_suspend()' expects 0 instead. 2. Checking of the 'mtd' parameter against NULL has been incorrectly removed in 'mtd_cls_suspend()'. This patch fixes the breakages. This has been found, analyzed, reported and tested by Rafael J. Wysocki . Note, this patch is not needed in the stable tree because it causes a regression introduced during the v3.3 merge window. Reported-by: Rafael J. Wysocki Tested-by: Rafael J. Wysocki Tested-by: Russell King Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdcore.c | 2 +- include/linux/mtd/mtd.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 6ae9ca01388b..9a9ce71a71fc 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state) { struct mtd_info *mtd = dev_get_drvdata(dev); - return mtd_suspend(mtd); + return mtd ? mtd_suspend(mtd) : 0; } static int mtd_cls_resume(struct device *dev) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 1a81fde8f333..d8c7aad7331c 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -427,9 +427,7 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) static inline int mtd_suspend(struct mtd_info *mtd) { - if (!mtd->suspend) - return -EOPNOTSUPP; - return mtd->suspend(mtd); + return mtd->suspend ? mtd->suspend(mtd) : 0; } static inline void mtd_resume(struct mtd_info *mtd) -- cgit v1.2.3 From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 26 Jan 2012 19:13:16 +0200 Subject: lib/mpi: replaced MPI_NULL with normal NULL MPI_NULL is replaced with normal NULL. Signed-off-by: Dmitry Kasatkin Reviewed-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/mpi.h | 2 -- lib/mpi/mpicoder.c | 8 ++++---- lib/mpi/mpiutil.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 06f88994ccaa..d02cca6cc8ce 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -57,8 +57,6 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; -#define MPI_NULL NULL - #define mpi_get_nlimbs(a) ((a)->nlimbs) #define mpi_is_neg(a) ((a)->sign) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 6116fc4990da..d7684aa7f65c 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) uint8_t *frame, *fr_pt; int i = 0, n; size_t asnlen = DIM(asn); - MPI a = MPI_NULL; + MPI a = NULL; if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe) pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n", @@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits) */ frame = kmalloc(nframe, GFP_KERNEL); if (!frame) - return MPI_NULL; + return NULL; n = 0; frame[n++] = 0; frame[n++] = 1; /* block type */ @@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) int i, j; unsigned nbits, nbytes, nlimbs, nread = 0; mpi_limb_t a; - MPI val = MPI_NULL; + MPI val = NULL; if (*ret_nread < 2) goto leave; @@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; val = mpi_alloc(nlimbs); if (!val) - return MPI_NULL; + return NULL; i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; val->nbits = nbits; diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index eefc55d6b7f5..6bfc41f62b8f 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a) size_t i; MPI b; - *copied = MPI_NULL; + *copied = NULL; if (a) { b = mpi_alloc(a->nlimbs); -- cgit v1.2.3 From e9c8d7a03e69093e4c33c5056a45c1233a42e8a4 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 18 Jan 2012 10:14:25 +0100 Subject: dma: sh_dma: not all SH DMAC implementations support MEMCPY Add a flag to allow platforms to specify, whether a DMAC instance supports the MEMCPY operation. To avoid regressions, preserve the current default. Signed-off-by: Guennadi Liakhovetski Acked-by: Paul Mundt Signed-off-by: Vinod Koul --- drivers/dma/shdma.c | 3 ++- include/linux/sh_dma.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 54043cd831c8..812fd76e9c18 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -1262,7 +1262,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) INIT_LIST_HEAD(&shdev->common.channels); - dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + if (!pdata->slave_only) + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); if (pdata->slave && pdata->slave_num) dma_cap_set(DMA_SLAVE, shdev->common.cap_mask); diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h index 8cd7fe59cf1a..425450b980b8 100644 --- a/include/linux/sh_dma.h +++ b/include/linux/sh_dma.h @@ -70,6 +70,7 @@ struct sh_dmae_pdata { unsigned int needs_tend_set:1; unsigned int no_dmars:1; unsigned int chclr_present:1; + unsigned int slave_only:1; }; /* DMA register */ -- cgit v1.2.3 From b18db3d91234c03ad080d317878c7c77672ba326 Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Wed, 1 Feb 2012 09:12:24 -0800 Subject: Input: gpio_keys - fix struct device declared inside parameter list A struct device parameter is used in the enable and disable callbacks to distinguish between different gpio_keys devices. Platforms that don't use these callbacks may not include struct device at all, as seen on arch/arm/mach-s3c2410/mach-n30.c Signed-off-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index b5ca4b2c08ec..004ff33ab38e 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -1,6 +1,8 @@ #ifndef _GPIO_KEYS_H #define _GPIO_KEYS_H +struct device; + struct gpio_keys_button { /* Configuration parameters */ unsigned int code; /* input event code (KEY_*, SW_*) */ -- cgit v1.2.3 From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 1 Feb 2012 11:10:24 -0800 Subject: mtd: fix merge conflict resolution breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes merge conflict resolution breakage introduced by merge d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream"). The commit changed 'mtd_can_have_bb()' function and made it always return zero, which is incorrect. Instead, we need it to return whether the underlying flash device can have bad eraseblocks or not. UBI needs this information because it affects how it handles the underlying flash. E.g., if the underlying flash is NOR, it cannot have bad blocks and any write or erase error is fatal, and all we can do is to switch to R/O mode. We do not need to reserve a pool of good eraseblocks for bad eraseblocks handling, and so on. This patch also removes 'mtd_can_have_bb()' invocations from Logfs to ensure correct Logfs behavior. I've tested that with this patch UBI works on top of NOR and NAND flashes emulated by mtdram and nandsim correspondingly. This patch is based on patch from Linus Torvalds. Signed-off-by: Artem Bityutskiy Acked-by: Jörn Engel Acked-by: Prasad Joshi Acked-by: Brian Norris Signed-off-by: Linus Torvalds --- fs/logfs/dev_mtd.c | 6 ------ include/linux/mtd/mtd.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d611e0..9c501449450d 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 221295208fd0..887ebe318c75 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd) static inline int mtd_can_have_bb(const struct mtd_info *mtd) { - return 0; + return !!mtd->block_isbad; } /* Kernel-side ioctl definitions */ -- cgit v1.2.3 From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 2 Feb 2012 11:34:09 +1030 Subject: Fix race in process_vm_rw_core This fixes the race in process_vm_core found by Oleg (see http://article.gmane.org/gmane.linux.kernel/1235667/ for details). This has been updated since I last sent it as the creation of the new mm_access() function did almost exactly the same thing as parts of the previous version of this patch did. In order to use mm_access() even when /proc isn't enabled, we move it to kernel/fork.c where other related process mm access functions already are. Signed-off-by: Chris Yeoh Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 -------------------- include/linux/sched.h | 6 ++++++ kernel/fork.c | 20 ++++++++++++++++++++ mm/process_vm_access.c | 23 +++++++++-------------- 4 files changed, 35 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index d9512bd03e6c..d4548dd49b02 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2234985a5e65..7d379a6bfd88 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm) extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ diff --git a/kernel/fork.c b/kernel/fork.c index 051f090d40c1..1b2ef3c23ae4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task) } EXPORT_SYMBOL_GPL(get_task_mm); +struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) +{ + struct mm_struct *mm; + int err; + + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, mode)) { + mmput(mm); + mm = ERR_PTR(-EACCES); + } + mutex_unlock(&task->signal->cred_guard_mutex); + + return mm; +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index e920aa3ce104..c20ff48994c2 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, goto free_proc_pages; } - task_lock(task); - if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) { - task_unlock(task); - rc = -EPERM; - goto put_task_struct; - } - mm = task->mm; - - if (!mm || (task->flags & PF_KTHREAD)) { - task_unlock(task); - rc = -EINVAL; + mm = mm_access(task, PTRACE_MODE_ATTACH); + if (!mm || IS_ERR(mm)) { + rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + /* + * Explicitly map EACCES to EPERM as EPERM is a more a + * appropriate error code for process_vw_readv/writev + */ + if (rc == -EACCES) + rc = -EPERM; goto put_task_struct; } - atomic_inc(&mm->mm_users); - task_unlock(task); - for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { rc = process_vm_rw_single_vec( (unsigned long)rvec[i].iov_base, rvec[i].iov_len, -- cgit v1.2.3 From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Feb 2012 15:29:08 -0800 Subject: include/linux/lp8727.h: Remove executable bit Signed-off-by: Josh Triplett Signed-off-by: Linus Torvalds --- include/linux/lp8727.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 include/linux/lp8727.h (limited to 'include/linux') diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h old mode 100755 new mode 100644 -- cgit v1.2.3 From f8447d6c213273b444c81eaa2449f55510229d4f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 14 Jan 2012 20:58:43 +0100 Subject: mfd: Store twl6040-codec mclk configuration Store the last used mclk configuration for the PLL. Signed-off-by: Peter Ujfalusi Signed-off-by: Samuel Ortiz --- drivers/mfd/twl6040-core.c | 3 +++ include/linux/mfd/twl6040.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c index dda86293dc9f..c2088f4c4547 100644 --- a/drivers/mfd/twl6040-core.c +++ b/drivers/mfd/twl6040-core.c @@ -282,6 +282,7 @@ int twl6040_power(struct twl6040 *twl6040, int on) /* Default PLL configuration after power up */ twl6040->pll = TWL6040_SYSCLK_SEL_LPPLL; twl6040->sysclk = 19200000; + twl6040->mclk = 32768; } else { /* already powered-down */ if (!twl6040->power_count) { @@ -305,6 +306,7 @@ int twl6040_power(struct twl6040 *twl6040, int on) twl6040_power_down(twl6040); } twl6040->sysclk = 0; + twl6040->mclk = 0; } out: @@ -421,6 +423,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id, } twl6040->sysclk = freq_out; + twl6040->mclk = freq_in; twl6040->pll = pll_id; pll_out: diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h index 2463c2619596..9bc9ac651dad 100644 --- a/include/linux/mfd/twl6040.h +++ b/include/linux/mfd/twl6040.h @@ -187,8 +187,10 @@ struct twl6040 { int rev; u8 vibra_ctrl_cache[2]; + /* PLL configuration */ int pll; unsigned int sysclk; + unsigned int mclk; unsigned int irq; unsigned int irq_base; -- cgit v1.2.3 From d020283dc694c9ec31b410f522252f7a8397e67d Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 3 Feb 2012 22:22:25 +0100 Subject: PM / QoS: CPU C-state breakage with PM Qos change Looks like change "PM QoS: Move and rename the implementation files" merged during the 3.2 development cycle made PM QoS depend on CONFIG_PM which depends on (PM_SLEEP || PM_RUNTIME). That breaks CPU C-states with kernels not having these CONFIGs, causing CPUs to spend time in Polling loop idle instead of going into deep C-states, consuming way way more power. This is with either acpi idle or intel idle enabled. Either CONFIG_PM should be enabled with any pm_qos users or the !CONFIG_PM pm_qos_request() should return sane defaults not to break the existing users. Here's is the patch for the latter option. [rjw: Modified the changelog slightly.] Signed-off-by: Venkatesh Pallipadi Signed-off-by: Rafael J. Wysocki Cc: stable@vger.kernel.org --- include/linux/pm_qos.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index e5bbcbaa6f57..4d99e4e6ef83 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -110,7 +110,19 @@ static inline void pm_qos_remove_request(struct pm_qos_request *req) { return; } static inline int pm_qos_request(int pm_qos_class) - { return 0; } +{ + switch (pm_qos_class) { + case PM_QOS_CPU_DMA_LATENCY: + return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE; + case PM_QOS_NETWORK_LATENCY: + return PM_QOS_NETWORK_LAT_DEFAULT_VALUE; + case PM_QOS_NETWORK_THROUGHPUT: + return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE; + default: + return PM_QOS_DEFAULT_VALUE; + } +} + static inline int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { return 0; } -- cgit v1.2.3 From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 4 Feb 2012 10:47:10 +0100 Subject: exec: fix use-after-free bug in setup_new_exec() Setting the task name is done within setup_new_exec() by accessing bprm->filename. However this happens after flush_old_exec(). This may result in a use after free bug, flush_old_exec() may "complete" vfork_done, which will wake up the parent which in turn may free the passed in filename. To fix this add a new tcomm field in struct linux_binprm which contains the now early generated task name until it is used. Fixes this bug on s390: Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000 Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818) Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374) Call Trace: ([<0000000000282e2c>] setup_new_exec+0x38/0x374) [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4 [<0000000000280a42>] search_binary_handler+0x38e/0x5bc [<0000000000282b6c>] do_execve_common+0x410/0x514 [<0000000000282cb6>] do_execve+0x46/0x58 [<00000000005bce58>] kernel_execve+0x28/0x70 [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140 [<00000000005bc8da>] kernel_thread_starter+0x6/0xc [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000002830f0>] setup_new_exec+0x2fc/0x374 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- fs/exec.c | 33 +++++++++++++++++---------------- include/linux/binfmts.h | 3 ++- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5c..92ce83a11e90 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fd88a3945aa1..0092102db2de 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,7 +18,7 @@ struct pt_regs; #define BINPRM_BUF_SIZE 128 #ifdef __KERNEL__ -#include +#include #define CORENAME_MAX_SIZE 128 @@ -58,6 +58,7 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + char tcomm[TASK_COMM_LEN]; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 -- cgit v1.2.3