From 0151e3d6d9d72df8f823c3f991a14ba63fb13f28 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Tue, 12 Nov 2013 15:07:34 -0800 Subject: Documentation/vm/zswap.txt: fix typos Signed-off-by: Christian Hesse Acked-by: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/zswap.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt index 7e492d8aaeaf..00c3d31e7971 100644 --- a/Documentation/vm/zswap.txt +++ b/Documentation/vm/zswap.txt @@ -8,7 +8,7 @@ significant performance improvement if reads from the compressed cache are faster than reads from a swap device. NOTE: Zswap is a new feature as of v3.11 and interacts heavily with memory -reclaim. This interaction has not be fully explored on the large set of +reclaim. This interaction has not been fully explored on the large set of potential configurations and workloads that exist. For this reason, zswap is a work in progress and should be considered experimental. @@ -23,7 +23,7 @@ Some potential benefits:     drastically reducing life-shortening writes. Zswap evicts pages from compressed cache on an LRU basis to the backing swap -device when the compressed pool reaches it size limit. This requirement had +device when the compressed pool reaches its size limit. This requirement had been identified in prior community discussions. To enabled zswap, the "enabled" attribute must be set to 1 at boot time. e.g. @@ -37,7 +37,7 @@ the backing swap device in the case that the compressed pool is full. Zswap makes use of zbud for the managing the compressed memory pool. Each allocation in zbud is not directly accessible by address. Rather, a handle is -return by the allocation routine and that handle must be mapped before being +returned by the allocation routine and that handle must be mapped before being accessed. The compressed memory pool grows on demand and shrinks as compressed pages are freed. The pool is not preallocated. @@ -56,7 +56,7 @@ in the swap_map goes to 0) the swap code calls the zswap invalidate function, via frontswap, to free the compressed entry. Zswap seeks to be simple in its policies. Sysfs attributes allow for one user -controlled policies: +controlled policy: * max_pool_percent - The maximum percentage of memory that the compressed pool can occupy. -- cgit v1.2.3 From 071aee138410210e3764f3ae8d37ef46dc6d3b42 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 12 Nov 2013 15:07:41 -0800 Subject: memcg: support hierarchical memory.numa_stats The memory.numa_stat file was not hierarchical. Memory charged to the children was not shown in parent's numa_stat. This change adds the "hierarchical_" stats to the existing stats. The new hierarchical stats include the sum of all children's values in addition to the value of the memcg. Tested: Create cgroup a, a/b and run workload under b. The values of b are included in the "hierarchical_*" under a. $ cd /sys/fs/cgroup $ echo 1 > memory.use_hierarchy $ mkdir a a/b Run workload in a/b: $ (echo $BASHPID >> a/b/cgroup.procs && cat /some/file && bash) & The hierarchical_ fields in parent (a) show use of workload in a/b: $ cat a/memory.numa_stat total=0 N0=0 N1=0 N2=0 N3=0 file=0 N0=0 N1=0 N2=0 N3=0 anon=0 N0=0 N1=0 N2=0 N3=0 unevictable=0 N0=0 N1=0 N2=0 N3=0 hierarchical_total=908 N0=552 N1=317 N2=39 N3=0 hierarchical_file=850 N0=549 N1=301 N2=0 N3=0 hierarchical_anon=58 N0=3 N1=16 N2=39 N3=0 hierarchical_unevictable=0 N0=0 N1=0 N2=0 N3=0 $ cat a/b/memory.numa_stat total=908 N0=552 N1=317 N2=39 N3=0 file=850 N0=549 N1=301 N2=0 N3=0 anon=58 N0=3 N1=16 N2=39 N3=0 unevictable=0 N0=0 N1=0 N2=0 N3=0 hierarchical_total=908 N0=552 N1=317 N2=39 N3=0 hierarchical_file=850 N0=549 N1=301 N2=0 N3=0 hierarchical_anon=58 N0=3 N1=16 N2=39 N3=0 hierarchical_unevictable=0 N0=0 N1=0 N2=0 N3=0 Signed-off-by: Ying Han Signed-off-by: Greg Thelen Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memory.txt | 10 +++++++--- mm/memcontrol.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 8af4ad121828..e2bc132608fd 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -573,15 +573,19 @@ an memcg since the pages are allowed to be allocated from any physical node. One of the use cases is evaluating application performance by combining this information with the application's CPU allocation. -We export "total", "file", "anon" and "unevictable" pages per-node for -each memcg. The ouput format of memory.numa_stat is: +Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" +per-node page counts including "hierarchical_" which sums up all +hierarchical children's values in addition to the memcg's own value. + +The ouput format of memory.numa_stat is: total= N0= N1= ... file= N0= N1= ... anon= N0= N1= ... unevictable= N0= N1= ... +hierarchical_= N0= N1= ... -And we have total = file + anon + unevictable. +The "total" count is sum of file + anon + unevictable. 6. Hierarchy support diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 91b5d3a62059..c89072443166 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5396,6 +5396,23 @@ static int memcg_numa_stat_show(struct cgroup_subsys_state *css, seq_putc(m, '\n'); } + for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { + struct mem_cgroup *iter; + + nr = 0; + for_each_mem_cgroup_tree(iter, memcg) + nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask); + seq_printf(m, "hierarchical_%s=%lu", stat->name, nr); + for_each_node_state(nid, N_MEMORY) { + nr = 0; + for_each_mem_cgroup_tree(iter, memcg) + nr += mem_cgroup_node_nr_lru_pages( + iter, nid, stat->lru_mask); + seq_printf(m, " N%d=%lu", nid, nr); + } + seq_putc(m, '\n'); + } + return 0; } #endif /* CONFIG_NUMA */ -- cgit v1.2.3 From ec8e41aec13005fed0dbee002fb8c99b4e001d50 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 12 Nov 2013 15:07:49 -0800 Subject: /proc/pid/smaps: show VM_SOFTDIRTY flag in VmFlags line This flag shows that the VMA is "newly created" and thus represents "dirty" in the task's VM. You can clear it by "echo 4 > /proc/pid/clear_refs." Signed-off-by: Naoya Horiguchi Cc: Wu Fengguang Cc: Pavel Emelyanov Acked-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 1 + fs/proc/task_mmu.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 823c95faebd2..22d89aa37218 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -460,6 +460,7 @@ manner. The codes are the following: nl - non-linear mapping ar - architecture specific flag dd - do not include area into core dump + sd - soft-dirty flag mm - mixed map area hg - huge page advise flag nh - no-huge page advise flag diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9f1369fe0afb..abbe825d20ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -561,6 +561,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_NONLINEAR)] = "nl", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_MEM_SOFT_DIRTY + [ilog2(VM_SOFTDIRTY)] = "sd", +#endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", -- cgit v1.2.3 From c5320926e370b4cfb8f10c2169e26f960079cf67 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 12 Nov 2013 15:08:10 -0800 Subject: mem-hotplug: introduce movable_node boot option The hot-Pluggable field in SRAT specifies which memory is hotpluggable. As we mentioned before, if hotpluggable memory is used by the kernel, it cannot be hot-removed. So memory hotplug users may want to set all hotpluggable memory in ZONE_MOVABLE so that the kernel won't use it. Memory hotplug users may also set a node as movable node, which has ZONE_MOVABLE only, so that the whole node can be hot-removed. But the kernel cannot use memory in ZONE_MOVABLE. By doing this, the kernel cannot use memory in movable nodes. This will cause NUMA performance down. And other users may be unhappy. So we need a way to allow users to enable and disable this functionality. In this patch, we introduce movable_node boot option to allow users to choose to not to consume hotpluggable memory at early boot time and later we can set it as ZONE_MOVABLE. To achieve this, the movable_node boot option will control the memblock allocation direction. That said, after memblock is ready, before SRAT is parsed, we should allocate memory near the kernel image as we explained in the previous patches. So if movable_node boot option is set, the kernel does the following: 1. After memblock is ready, make memblock allocate memory bottom up. 2. After SRAT is parsed, make memblock behave as default, allocate memory top down. Users can specify "movable_node" in kernel commandline to enable this functionality. For those who don't use memory hotplug or who don't want to lose their NUMA performance, just don't specify anything. The kernel will work as before. Signed-off-by: Tang Chen Signed-off-by: Zhang Yanfei Suggested-by: Kamezawa Hiroyuki Suggested-by: Ingo Molnar Acked-by: Tejun Heo Acked-by: Toshi Kani Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Wanpeng Li Cc: Thomas Renninger Cc: Yinghai Lu Cc: Jiang Liu Cc: Wen Congyang Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Rik van Riel Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ arch/x86/mm/numa.c | 11 +++++++++++ mm/Kconfig | 17 ++++++++++++----- mm/memory_hotplug.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fd3ecedc084d..882a40d405c8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1775,6 +1775,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that the amount of memory usable for all allocations is not too small. + movable_node [KNL,X86] Boot-time switch to enable the effects + of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details. + MTD_Partition= [MTD] Format: ,,, diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8bf93bae1f13..24aec58d6afd 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -567,6 +567,17 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) return ret; + + /* + * We reset memblock back to the top-down direction + * here because if we configured ACPI_NUMA, we have + * parsed SRAT in init_func(). It is ok to have the + * reset here even if we did't configure ACPI_NUMA + * or acpi numa init fails and fallbacks to dummy + * numa init. + */ + memblock_set_bottom_up(false); + ret = numa_cleanup_meminfo(&numa_meminfo); if (ret < 0) return ret; diff --git a/mm/Kconfig b/mm/Kconfig index 394838f489eb..3f4ffda152bb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -153,11 +153,18 @@ config MOVABLE_NODE help Allow a node to have only movable memory. Pages used by the kernel, such as direct mapping pages cannot be migrated. So the corresponding - memory device cannot be hotplugged. This option allows users to - online all the memory of a node as movable memory so that the whole - node can be hotplugged. Users who don't use the memory hotplug - feature are fine with this option on since they don't online memory - as movable. + memory device cannot be hotplugged. This option allows the following + two things: + - When the system is booting, node full of hotpluggable memory can + be arranged to have only movable memory so that the whole node can + be hot-removed. (need movable_node boot option specified). + - After the system is up, the option allows users to online all the + memory of a node as movable memory so that the whole node can be + hot-removed. + + Users who don't use the memory hotplug feature are fine with this + option on since they don't specify movable_node boot option or they + don't online memory as movable. Say Y here if you want to hotplug a whole node. Say N here if you want kernel to use memory on all nodes evenly. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1b6fe8ca71e6..489f235502db 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1422,6 +1423,36 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages) } #endif /* CONFIG_MOVABLE_NODE */ +static int __init cmdline_parse_movable_node(char *p) +{ +#ifdef CONFIG_MOVABLE_NODE + /* + * Memory used by the kernel cannot be hot-removed because Linux + * cannot migrate the kernel pages. When memory hotplug is + * enabled, we should prevent memblock from allocating memory + * for the kernel. + * + * ACPI SRAT records all hotpluggable memory ranges. But before + * SRAT is parsed, we don't know about it. + * + * The kernel image is loaded into memory at very early time. We + * cannot prevent this anyway. So on NUMA system, we set any + * node the kernel resides in as un-hotpluggable. + * + * Since on modern servers, one node could have double-digit + * gigabytes memory, we can assume the memory around the kernel + * image is also un-hotpluggable. So before SRAT is parsed, just + * allocate memory near the kernel image to try the best to keep + * the kernel away from hotpluggable memory. + */ + memblock_set_bottom_up(true); +#else + pr_warn("movable_node option not supported\n"); +#endif + return 0; +} +early_param("movable_node", cmdline_parse_movable_node); + /* check which state of node_states will be changed when offline memory */ static void node_states_check_changes_offline(unsigned long nr_pages, struct zone *zone, struct memory_notify *arg) -- cgit v1.2.3 From 715ea41e60277f28f84d6c937737350e00955d56 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 12 Nov 2013 15:08:30 -0800 Subject: mm: improve the description for dirty_background_ratio/dirty_ratio sysctl Now dirty_background_ratio/dirty_ratio contains a percentage of total avaiable memory, which contains free pages and reclaimable pages. The number of these pages is not equal to the number of total system memory. But they are described as a percentage of total system memory in Documentation/sysctl/vm.txt. So we need to fix them to avoid misunderstanding. Signed-off-by: Zheng Liu Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 79a797eb3e87..1fbd4eb7b64a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -119,8 +119,11 @@ other appears as 0 when read. dirty_background_ratio -Contains, as a percentage of total system memory, the number of pages at which -the background kernel flusher threads will start writing out dirty data. +Contains, as a percentage of total available memory that contains free pages +and reclaimable pages, the number of pages at which the background kernel +flusher threads will start writing out dirty data. + +The total avaiable memory is not equal to total system memory. ============================================================== @@ -151,9 +154,11 @@ interval will be written out next time a flusher thread wakes up. dirty_ratio -Contains, as a percentage of total system memory, the number of pages at which -a process which is generating disk writes will itself start writing out dirty -data. +Contains, as a percentage of total available memory that contains free pages +and reclaimable pages, the number of pages at which a process which is +generating disk writes will itself start writing out dirty data. + +The total avaiable memory is not equal to total system memory. ============================================================== -- cgit v1.2.3 From 3d035f580699feba352f8703cced127fc203f0dd Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 12 Nov 2013 15:08:33 -0800 Subject: drivers/char/hpet.c: allow user controlled mmap for user processes The CONFIG_HPET_MMAP Kconfig option exposes the memory map of the HPET registers to userspace. The Kconfig help points out that in some cases this can be a security risk as some systems may erroneously configure the map such that additional data is exposed to userspace. This is a problem for distributions -- some users want the MMAP functionality but it comes with a significant security risk. In an effort to mitigate this risk, and due to the low number of users of the MMAP functionality, I've introduced a kernel parameter, hpet_mmap_enable, that is required in order to actually have the HPET MMAP exposed. Signed-off-by: Prarit Bhargava Acked-by: Matt Wilson Signed-off-by: Clemens Ladisch Cc: Randy Dunlap Cc: Tomas Winkler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ drivers/char/Kconfig | 10 ++++++++-- drivers/char/hpet.c | 24 ++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 882a40d405c8..9ca3e74a10e1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1070,6 +1070,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. VIA, nVidia) verbose: show contents of HPET registers during setup + hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET + registers. Default set by CONFIG_HPET_MMAP_DEFAULT. + hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. On x86-64 and powerpc, this option can be specified diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 14219972c745..fa3243d71c76 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -522,10 +522,16 @@ config HPET_MMAP If you say Y here, user applications will be able to mmap the HPET registers. +config HPET_MMAP_DEFAULT + bool "Enable HPET MMAP access by default" + default y + depends on HPET_MMAP + help In some hardware implementations, the page containing HPET registers may also contain other things that shouldn't be - exposed to the user. If this applies to your hardware, - say N here. + exposed to the user. This option selects the default (if + kernel parameter hpet_mmap is not set) user access to the + registers for applications that require it. config HANGCHECK_TIMER tristate "Hangcheck timer" diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index dca5834685cf..5d9c31dfc905 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -367,12 +367,29 @@ static unsigned int hpet_poll(struct file *file, poll_table * wait) return 0; } +#ifdef CONFIG_HPET_MMAP +#ifdef CONFIG_HPET_MMAP_DEFAULT +static int hpet_mmap_enabled = 1; +#else +static int hpet_mmap_enabled = 0; +#endif + +static __init int hpet_mmap_enable(char *str) +{ + get_option(&str, &hpet_mmap_enabled); + pr_info("HPET mmap %s\n", hpet_mmap_enabled ? "enabled" : "disabled"); + return 1; +} +__setup("hpet_mmap", hpet_mmap_enable); + static int hpet_mmap(struct file *file, struct vm_area_struct *vma) { -#ifdef CONFIG_HPET_MMAP struct hpet_dev *devp; unsigned long addr; + if (!hpet_mmap_enabled) + return -EACCES; + devp = file->private_data; addr = devp->hd_hpets->hp_hpet_phys; @@ -381,10 +398,13 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return vm_iomap_memory(vma, addr, PAGE_SIZE); +} #else +static int hpet_mmap(struct file *file, struct vm_area_struct *vma) +{ return -ENOSYS; -#endif } +#endif static int hpet_fasync(int fd, struct file *file, int on) { -- cgit v1.2.3 From 312b4e226951f707e120b95b118cbc14f3d162b2 Mon Sep 17 00:00:00 2001 From: Ryan Mallon Date: Tue, 12 Nov 2013 15:08:51 -0800 Subject: vsprintf: check real user/group id for %pK Some setuid binaries will allow reading of files which have read permission by the real user id. This is problematic with files which use %pK because the file access permission is checked at open() time, but the kptr_restrict setting is checked at read() time. If a setuid binary opens a %pK file as an unprivileged user, and then elevates permissions before reading the file, then kernel pointer values may be leaked. This happens for example with the setuid pppd application on Ubuntu 12.04: $ head -1 /proc/kallsyms 00000000 T startup_32 $ pppd file /proc/kallsyms pppd: In file /proc/kallsyms: unrecognized option 'c1000000' This will only leak the pointer value from the first line, but other setuid binaries may leak more information. Fix this by adding a check that in addition to the current process having CAP_SYSLOG, that effective user and group ids are equal to the real ids. If a setuid binary reads the contents of a file which uses %pK then the pointer values will be printed as NULL if the real user is unprivileged. Update the sysctl documentation to reflect the changes, and also correct the documentation to state the kptr_restrict=0 is the default. This is a only temporary solution to the issue. The correct solution is to do the permission check at open() time on files, and to replace %pK with a function which checks the open() time permission. %pK uses in printk should be removed since no sane permission check can be done, and instead protected by using dmesg_restrict. Signed-off-by: Ryan Mallon Cc: Kees Cook Cc: Alexander Viro Cc: Joe Perches Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 25 ++++++++++++++++++------- lib/vsprintf.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 4273b2d71a27..26b7ee491df8 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -290,13 +290,24 @@ Default value is "/sbin/hotplug". kptr_restrict: This toggle indicates whether restrictions are placed on -exposing kernel addresses via /proc and other interfaces. When -kptr_restrict is set to (0), there are no restrictions. When -kptr_restrict is set to (1), the default, kernel pointers -printed using the %pK format specifier will be replaced with 0's -unless the user has CAP_SYSLOG. When kptr_restrict is set to -(2), kernel pointers printed using %pK will be replaced with 0's -regardless of privileges. +exposing kernel addresses via /proc and other interfaces. + +When kptr_restrict is set to (0), the default, there are no restrictions. + +When kptr_restrict is set to (1), kernel pointers printed using the %pK +format specifier will be replaced with 0's unless the user has CAP_SYSLOG +and effective user and group ids are equal to the real ids. This is +because %pK checks are done at read() time rather than open() time, so +if permissions are elevated between the open() and the read() (e.g via +a setuid binary) then %pK will not leak kernel pointers to unprivileged +users. Note, this is a temporary solution only. The correct long-term +solution is to do the permission checks at open() time. Consider removing +world read permissions from files that use %pK, and using dmesg_restrict +to protect against uses of %pK in dmesg(8) if leaking kernel pointer +values to unprivileged users is a concern. + +When kptr_restrict is set to (2), kernel pointers printed using +%pK will be replaced with 0's regardless of privileges. ============================================================== diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 26559bdb4c49..d76555c45ff4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include /* for PAGE_SIZE */ @@ -1312,11 +1313,37 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, spec.field_width = default_width; return string(buf, end, "pK-error", spec); } - if (!((kptr_restrict == 0) || - (kptr_restrict == 1 && - has_capability_noaudit(current, CAP_SYSLOG)))) + + switch (kptr_restrict) { + case 0: + /* Always print %pK values */ + break; + case 1: { + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + const struct cred *cred = current_cred(); + + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ ptr = NULL; + break; + } break; + case 'N': switch (fmt[1]) { case 'F': -- cgit v1.2.3 From b300645aec9a5d71f6d85d5138215a9d6d7fe5e6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 12 Nov 2013 15:08:56 -0800 Subject: MAINTAINERS: update Zwane Mwaikambo's e-mail address Zwane Mwaikambo's @arm.linux.org.uk address no longer works. In February 2013 he asked for his gmail address to be used instead [1] so let's just do that. [1] http://marc.info/?l=linux-kernel&m=136079068903214&w=2 Signed-off-by: Jean Delvare Cc: Russell King Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 2 +- Documentation/cpu-hotplug.txt | 2 +- MAINTAINERS | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/CREDITS b/CREDITS index dc04d8f80a89..4fc997d58ab2 100644 --- a/CREDITS +++ b/CREDITS @@ -2576,7 +2576,7 @@ S: Toronto, Ontario S: Canada N: Zwane Mwaikambo -E: zwane@arm.linux.org.uk +E: zwanem@gmail.com D: Various driver hacking D: Lowlevel x86 kernel hacking D: General debugging diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 786dc82f98ce..8cb9938cc47e 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -5,7 +5,7 @@ Rusty Russell Srivatsa Vaddagiri i386: - Zwane Mwaikambo + Zwane Mwaikambo ppc64: Nathan Lynch Joel Schopp diff --git a/MAINTAINERS b/MAINTAINERS index 30c1e3913cb3..5cfa91b1974a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7319,7 +7319,7 @@ S: Odd Fixes F: drivers/media/usb/tlg2300/ SC1200 WDT DRIVER -M: Zwane Mwaikambo +M: Zwane Mwaikambo S: Maintained F: drivers/watchdog/sc1200wdt.c -- cgit v1.2.3 From 5812c13a4e636da4bd7f7cabbbbc59d9dbf3c86c Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Tue, 12 Nov 2013 15:08:57 -0800 Subject: backlight: lp855x_bl: support new LP8555 device LP8555 is one of the LP855x family devices. This device needs pre_init_device() and post_init_device() driver structure. It's same as LP8557, so the device configuration code is shared with LP8557. Backlight outputs are generated from dual DC-DC boost converters. It's configurable EPROM settings which are defined in the platform data. Driver documentation and device tree bindings are updated. Signed-off-by: Milo Kim Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/backlight/lp855x-driver.txt | 5 ++-- .../devicetree/bindings/video/backlight/lp855x.txt | 29 +++++++++++++++++++++- drivers/video/backlight/Kconfig | 4 +-- drivers/video/backlight/lp855x_bl.c | 17 +++++++++++-- include/linux/platform_data/lp855x.h | 19 ++++++++++++++ 5 files changed, 67 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt index 1c732f0c6758..01bce243d3d7 100644 --- a/Documentation/backlight/lp855x-driver.txt +++ b/Documentation/backlight/lp855x-driver.txt @@ -4,7 +4,8 @@ Kernel driver lp855x Backlight driver for LP855x ICs Supported chips: - Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 + Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and + LP8557 Author: Milo(Woogyom) Kim @@ -24,7 +25,7 @@ Value : pwm based or register based 2) chip_id The lp855x chip id. -Value : lp8550/lp8551/lp8552/lp8553/lp8556/lp8557 +Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557 Platform data for lp855x ------------------------ diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt index 1482103d288f..96e83a56048e 100644 --- a/Documentation/devicetree/bindings/video/backlight/lp855x.txt +++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt @@ -2,7 +2,7 @@ lp855x bindings Required properties: - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553", - "ti,lp8556", "ti,lp8557" + "ti,lp8555", "ti,lp8556", "ti,lp8557" - reg: I2C slave address (u8) - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device. @@ -15,6 +15,33 @@ Optional properties: Example: + /* LP8555 */ + backlight@2c { + compatible = "ti,lp8555"; + reg = <0x2c>; + + dev-ctrl = /bits/ 8 <0x00>; + pwm-period = <10000>; + + /* 4V OV, 4 output LED0 string enabled */ + rom_14h { + rom-addr = /bits/ 8 <0x14>; + rom-val = /bits/ 8 <0xcf>; + }; + + /* Heavy smoothing, 24ms ramp time step */ + rom_15h { + rom-addr = /bits/ 8 <0x15>; + rom-val = /bits/ 8 <0xc7>; + }; + + /* 4 output LED1 string enabled */ + rom_19h { + rom-addr = /bits/ 8 <0x19>; + rom-val = /bits/ 8 <0x0f>; + }; + }; + /* LP8556 */ backlight@2c { compatible = "ti,lp8556"; diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index d4a7a351d67c..a65dd063ecad 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -388,8 +388,8 @@ config BACKLIGHT_LP855X tristate "Backlight driver for TI LP855X" depends on BACKLIGHT_CLASS_DEVICE && I2C help - This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 - backlight driver. + This supports TI LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and + LP8557 backlight driver. config BACKLIGHT_LP8788 tristate "Backlight driver for TI LP8788 MFD" diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index c0b41f13bd4a..c952175d4113 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -26,13 +26,15 @@ #define LP8556_EPROM_START 0xA0 #define LP8556_EPROM_END 0xAF -/* LP8557 Registers */ +/* LP8555/7 Registers */ #define LP8557_BL_CMD 0x00 #define LP8557_BL_MASK 0x01 #define LP8557_BL_ON 0x01 #define LP8557_BL_OFF 0x00 #define LP8557_BRIGHTNESS_CTRL 0x04 #define LP8557_CONFIG 0x10 +#define LP8555_EPROM_START 0x10 +#define LP8555_EPROM_END 0x7A #define LP8557_EPROM_START 0x10 #define LP8557_EPROM_END 0x1E @@ -111,6 +113,10 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) start = LP8556_EPROM_START; end = LP8556_EPROM_END; break; + case LP8555: + start = LP8555_EPROM_START; + end = LP8555_EPROM_END; + break; case LP8557: start = LP8557_EPROM_START; end = LP8557_EPROM_END; @@ -165,9 +171,14 @@ static int lp855x_configure(struct lp855x *lp) struct lp855x_platform_data *pd = lp->pdata; switch (lp->chip_id) { - case LP8550 ... LP8556: + case LP8550: + case LP8551: + case LP8552: + case LP8553: + case LP8556: lp->cfg = &lp855x_dev_cfg; break; + case LP8555: case LP8557: lp->cfg = &lp8557_dev_cfg; break; @@ -470,6 +481,7 @@ static const struct of_device_id lp855x_dt_ids[] = { { .compatible = "ti,lp8551", }, { .compatible = "ti,lp8552", }, { .compatible = "ti,lp8553", }, + { .compatible = "ti,lp8555", }, { .compatible = "ti,lp8556", }, { .compatible = "ti,lp8557", }, { } @@ -481,6 +493,7 @@ static const struct i2c_device_id lp855x_ids[] = { {"lp8551", LP8551}, {"lp8552", LP8552}, {"lp8553", LP8553}, + {"lp8555", LP8555}, {"lp8556", LP8556}, {"lp8557", LP8557}, { } diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h index ea3200527dd3..1b2ba24e4e03 100644 --- a/include/linux/platform_data/lp855x.h +++ b/include/linux/platform_data/lp855x.h @@ -40,6 +40,17 @@ #define LP8553_PWM_CONFIG LP8550_PWM_CONFIG #define LP8553_I2C_CONFIG LP8550_I2C_CONFIG +/* CONFIG register - LP8555 */ +#define LP8555_PWM_STANDBY BIT(7) +#define LP8555_PWM_FILTER BIT(6) +#define LP8555_RELOAD_EPROM BIT(3) /* use it if EPROMs should be reset + when the backlight turns on */ +#define LP8555_OFF_OPENLEDS BIT(2) +#define LP8555_PWM_CONFIG LP8555_PWM_ONLY +#define LP8555_I2C_CONFIG LP8555_I2C_ONLY +#define LP8555_COMB1_CONFIG LP8555_COMBINED1 +#define LP8555_COMB2_CONFIG LP8555_COMBINED2 + /* DEVICE CONTROL register - LP8556 */ #define LP8556_PWM_CONFIG (LP8556_PWM_ONLY << BRT_MODE_SHFT) #define LP8556_COMB1_CONFIG (LP8556_COMBINED1 << BRT_MODE_SHFT) @@ -65,6 +76,7 @@ enum lp855x_chip_id { LP8551, LP8552, LP8553, + LP8555, LP8556, LP8557, }; @@ -89,6 +101,13 @@ enum lp8553_brighntess_source { LP8553_I2C_ONLY = LP8550_I2C_ONLY, }; +enum lp8555_brightness_source { + LP8555_PWM_ONLY, + LP8555_I2C_ONLY, + LP8555_COMBINED1, /* Brightness register with shaped PWM */ + LP8555_COMBINED2, /* PWM with shaped brightness register */ +}; + enum lp8556_brightness_source { LP8556_PWM_ONLY, LP8556_COMBINED1, /* pwm + i2c before the shaper block */ -- cgit v1.2.3 From 965fd2968a5daf66c21a21a539528f6d3ec7c53b Mon Sep 17 00:00:00 2001 From: Luis Ortega Perez de Villar Date: Tue, 12 Nov 2013 15:11:10 -0800 Subject: Documentation/filesystems/vfat.txt: fix directory entry example Slots can store up to 13 characters for the file name but one of the examples has one character too many. Signed-off-by: Luis Ortega Perez de Villar Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index aa1f459fa6cf..4a93e98b290a 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -307,7 +307,7 @@ the following: - + -- cgit v1.2.3 From c7708649ccbdc21e85c95ca1a01b28342d939d39 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Tue, 12 Nov 2013 15:11:11 -0800 Subject: Documentation/trace/tracepoints.txt: add links to TRACE_EVENT documentation Existing tracepoint documentation doesn't mention the popular TRACE_EVENT macro. Since an excellent series of articles on proper usage already exists, respective links are added to the existing documentation. Signed-off-by: Stefan Raspl Cc: Rob Landley Cc: Jiri Kosina Acked-by: Mathieu Desnoyers Cc: Zoltan Kiss Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/trace/tracepoints.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt index ac4170dd0f24..6b018b53177a 100644 --- a/Documentation/trace/tracepoints.txt +++ b/Documentation/trace/tracepoints.txt @@ -114,3 +114,8 @@ core kernel image or in modules. If the tracepoint has to be used in kernel modules, an EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to export the defined tracepoints. + +Note: The convenience macro TRACE_EVENT provides an alternative way to + define tracepoints. Check http://lwn.net/Articles/379903, + http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362 + for a series of articles with more details. -- cgit v1.2.3 From ee2f51dc018e4dee06c6a6d52e821e27d701d650 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 12 Nov 2013 15:11:14 -0800 Subject: Documentation/ABI: document the non-ABI status of Kconfig and symbols Discussion at Kernel Summit made it clear that the presence or absence of specific Kconfig symbols are not considered ABI, and that no userspace (or bootloader, etc) should rely on them. In addition, kernel-internal symbols are well established as non-ABI, per Documentation/stable_api_nonsense.txt. Document both of these in Documentation/ABI/README, in a new section for notable bits of non-ABI. Signed-off-by: Josh Triplett Cc: Rob Landley Cc: Tao Ma Cc: Greg Kroah-Hartman Acked-by: H. Peter Anvin Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/README | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ABI/README b/Documentation/ABI/README index 10069828568b..1fafc4b0753b 100644 --- a/Documentation/ABI/README +++ b/Documentation/ABI/README @@ -72,3 +72,16 @@ kernel tree without going through the obsolete state first. It's up to the developer to place their interfaces in the category they wish for it to start out in. + + +Notable bits of non-ABI, which should not under any circumstances be considered +stable: + +- Kconfig. Userspace should not rely on the presence or absence of any + particular Kconfig symbol, in /proc/config.gz, in the copy of .config + commonly installed to /boot, or in any invocation of the kernel build + process. + +- Kernel-internal symbols. Do not rely on the presence, absence, location, or + type of any kernel symbol, either in System.map files or the kernel binary + itself. See Documentation/stable_api_nonsense.txt. -- cgit v1.2.3 From 17c568d60af5a810208baf116dc174a2005c6c3e Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Tue, 12 Nov 2013 15:11:27 -0800 Subject: gcov: compile specific gcov implementation based on gcc version Compile the correct gcov implementation file for the specific gcc version. Signed-off-by: Frantisek Hrbata Cc: Jan Stancek Cc: Kees Cook Acked-by: Peter Oberparleiter Cc: Rusty Russell Cc: Arnd Bergmann Cc: Andy Gospodarek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gcov.txt | 4 ++++ kernel/gcov/Kconfig | 30 ++++++++++++++++++++++++++++++ kernel/gcov/Makefile | 32 +++++++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt index e7ca6478cd93..7b727783db7e 100644 --- a/Documentation/gcov.txt +++ b/Documentation/gcov.txt @@ -50,6 +50,10 @@ Configure the kernel with: CONFIG_DEBUG_FS=y CONFIG_GCOV_KERNEL=y +select the gcc's gcov format, default is autodetect based on gcc version: + + CONFIG_GCOV_FORMAT_AUTODETECT=y + and to get coverage data for the entire kernel: CONFIG_GCOV_PROFILE_ALL=y diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index d4da55d1fb65..d04ce8ac4399 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -46,4 +46,34 @@ config GCOV_PROFILE_ALL larger and run slower. Also be sure to exclude files from profiling which are not linked to the kernel image to prevent linker errors. +choice + prompt "Specify GCOV format" + depends on GCOV_KERNEL + default GCOV_FORMAT_AUTODETECT + ---help--- + The gcov format is usually determined by the GCC version, but there are + exceptions where format changes are integrated in lower-version GCCs. + In such a case use this option to adjust the format used in the kernel + accordingly. + + If unsure, choose "Autodetect". + +config GCOV_FORMAT_AUTODETECT + bool "Autodetect" + ---help--- + Select this option to use the format that corresponds to your GCC + version. + +config GCOV_FORMAT_3_4 + bool "GCC 3.4 format" + ---help--- + Select this option to use the format defined by GCC 3.4. + +config GCOV_FORMAT_4_7 + bool "GCC 4.7 format" + ---help--- + Select this option to use the format defined by GCC 4.7. + +endchoice + endmenu diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index e97ca59e2520..52aa7e8de927 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -1,3 +1,33 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' -obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o +# if-lt +# Usage VAR := $(call if-lt, $(a), $(b)) +# Returns 1 if (a < b) +if-lt = $(shell [ $(1) -lt $(2) ] && echo 1) + +ifeq ($(CONFIG_GCOV_FORMAT_3_4),y) + cc-ver := 0304 +else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y) + cc-ver := 0407 +else +# Use cc-version if available, otherwise set 0 +# +# scripts/Kbuild.include, which contains cc-version function, is not included +# during make clean "make -f scripts/Makefile.clean obj=kernel/gcov" +# Meaning cc-ver is empty causing if-lt test to fail with +# "/bin/sh: line 0: [: -lt: unary operator expected" error mesage. +# This has no affect on the clean phase, but the error message could be +# confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version +# is not available. We can probably move if-lt to Kbuild.include, so it's also +# not defined during clean or to include Kbuild.include in +# scripts/Makefile.clean. But the following workaround seems least invasive. + cc-ver := $(if $(call cc-version),$(call cc-version),0) +endif + +obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o + +ifeq ($(call if-lt, $(cc-ver), 0407),1) + obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o +else + obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o +endif -- cgit v1.2.3