From e454cea20bdcff10ee698d11b8882662a0153a47 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 18 Sep 2009 23:01:12 +0200 Subject: Driver-Core: extend devnode callbacks to provide permissions This allows subsytems to provide devtmpfs with non-default permissions for the device node. Instead of the default mode of 0600, null, zero, random, urandom, full, tty, ptmx now have a mode of 0666, which allows non-privileged processes to access standard device nodes in case no other userspace process applies the expected permissions. This also fixes a wrong assignment in pktcdvd and a checkpatch.pl complain. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 19 ++++++++++++------- drivers/base/devtmpfs.c | 24 ++++++++++++++++-------- 2 files changed, 28 insertions(+), 15 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index 390e664ec1c7..6bee6af8d8e1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -166,13 +166,16 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, if (MAJOR(dev->devt)) { const char *tmp; const char *name; + mode_t mode = 0; add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); - name = device_get_nodename(dev, &tmp); + name = device_get_devnode(dev, &mode, &tmp); if (name) { add_uevent_var(env, "DEVNAME=%s", name); kfree(tmp); + if (mode) + add_uevent_var(env, "DEVMODE=%#o", mode & 0777); } } @@ -1148,8 +1151,9 @@ static struct device *next_device(struct klist_iter *i) } /** - * device_get_nodename - path of device node file + * device_get_devnode - path of device node file * @dev: device + * @mode: returned file access mode * @tmp: possibly allocated string * * Return the relative path of a possible device node. @@ -1157,21 +1161,22 @@ static struct device *next_device(struct klist_iter *i) * a name. This memory is returned in tmp and needs to be * freed by the caller. */ -const char *device_get_nodename(struct device *dev, const char **tmp) +const char *device_get_devnode(struct device *dev, + mode_t *mode, const char **tmp) { char *s; *tmp = NULL; /* the device type may provide a specific name */ - if (dev->type && dev->type->nodename) - *tmp = dev->type->nodename(dev); + if (dev->type && dev->type->devnode) + *tmp = dev->type->devnode(dev, mode); if (*tmp) return *tmp; /* the class may provide a specific name */ - if (dev->class && dev->class->nodename) - *tmp = dev->class->nodename(dev); + if (dev->class && dev->class->devnode) + *tmp = dev->class->devnode(dev, mode); if (*tmp) return *tmp; diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index fd488ad4263a..a1cb5afe6801 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -6,9 +6,10 @@ * During bootup, before any driver core device is registered, * devtmpfs, a tmpfs-based filesystem is created. Every driver-core * device which requests a device node, will add a node in this - * filesystem. The node is named after the the name of the device, - * or the susbsytem can provide a custom name. All devices are - * owned by root and have a mode of 0600. + * filesystem. + * By default, all devices are named after the the name of the + * device, owned by root and have a default mode of 0600. Subsystems + * can overwrite the default setting if needed. */ #include @@ -20,6 +21,7 @@ #include #include #include +#include #include static struct vfsmount *dev_mnt; @@ -134,7 +136,7 @@ int devtmpfs_create_node(struct device *dev) const char *tmp = NULL; const char *nodename; const struct cred *curr_cred; - mode_t mode; + mode_t mode = 0; struct nameidata nd; struct dentry *dentry; int err; @@ -142,14 +144,16 @@ int devtmpfs_create_node(struct device *dev) if (!dev_mnt) return 0; - nodename = device_get_nodename(dev, &tmp); + nodename = device_get_devnode(dev, &mode, &tmp); if (!nodename) return -ENOMEM; + if (mode == 0) + mode = 0600; if (is_blockdev(dev)) - mode = S_IFBLK|0600; + mode |= S_IFBLK; else - mode = S_IFCHR|0600; + mode |= S_IFCHR; curr_cred = override_creds(&init_cred); err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, @@ -165,8 +169,12 @@ int devtmpfs_create_node(struct device *dev) dentry = lookup_create(&nd, 0); if (!IS_ERR(dentry)) { + int umask; + + umask = sys_umask(0000); err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, dev->devt); + sys_umask(umask); /* mark as kernel created inode */ if (!err) dentry->d_inode->i_private = &dev_mnt; @@ -271,7 +279,7 @@ int devtmpfs_delete_node(struct device *dev) if (!dev_mnt) return 0; - nodename = device_get_nodename(dev, &tmp); + nodename = device_get_devnode(dev, NULL, &tmp); if (!nodename) return -ENOMEM; -- cgit v1.2.3 From c6a7f5728a1db45d30df55a01adc130b4ab0327c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:32 -0700 Subject: mm: oom analysis: Show kernel stack usage in /proc/meminfo and OOM log output The amount of memory allocated to kernel stacks can become significant and cause OOM conditions. However, we do not display the amount of memory consumed by stacks. Add code to display the amount of memory used for stacks in /proc/meminfo. Signed-off-by: KOSAKI Motohiro Reviewed-by: Christoph Lameter Reviewed-by: Minchan Kim Reviewed-by: Rik van Riel Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 3 +++ fs/proc/meminfo.c | 2 ++ include/linux/mmzone.h | 3 ++- kernel/fork.c | 11 +++++++++++ mm/page_alloc.c | 3 +++ mm/vmstat.c | 1 + 6 files changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 91d4087b4039..b560c17f6d4e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d FilePages: %8lu kB\n" "Node %d Mapped: %8lu kB\n" "Node %d AnonPages: %8lu kB\n" + "Node %d KernelStack: %8lu kB\n" "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" @@ -116,6 +117,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), + nid, node_page_state(nid, NR_KERNEL_STACK) * + THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index d5c410d47fae..1fc588f430e4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -84,6 +84,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Slab: %8lu kB\n" "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" + "KernelStack: %8lu kB\n" "PageTables: %8lu kB\n" #ifdef CONFIG_QUICKLIST "Quicklists: %8lu kB\n" @@ -128,6 +129,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), + global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, K(global_page_state(NR_PAGETABLE)), #ifdef CONFIG_QUICKLIST K(quicklist_total_size()), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 889598537370..d9335b8de84a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -94,10 +94,11 @@ enum zone_stat_item { NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ + NR_KERNEL_STACK, + /* Second 128 byte cacheline */ NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, - /* Second 128 byte cacheline */ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ diff --git a/kernel/fork.c b/kernel/fork.c index 2cebfb23b0b8..d4638c8cc19e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -136,9 +136,17 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +static void account_kernel_stack(struct thread_info *ti, int account) +{ + struct zone *zone = page_zone(virt_to_page(ti)); + + mod_zone_page_state(zone, NR_KERNEL_STACK, account); +} + void free_task(struct task_struct *tsk) { prop_local_destroy_single(&tsk->dirties); + account_kernel_stack(tsk->stack, -1); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -253,6 +261,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; + + account_kernel_stack(ti, 1); + return tsk; out: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 494c09196c30..4e050f325ebd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2177,6 +2177,7 @@ void show_free_areas(void) " mapped:%lukB" " slab_reclaimable:%lukB" " slab_unreclaimable:%lukB" + " kernel_stack:%lukB" " pagetables:%lukB" " unstable:%lukB" " bounce:%lukB" @@ -2201,6 +2202,8 @@ void show_free_areas(void) K(zone_page_state(zone, NR_FILE_MAPPED)), K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), + zone_page_state(zone, NR_KERNEL_STACK) * + THREAD_SIZE / 1024, K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_UNSTABLE_NFS)), K(zone_page_state(zone, NR_BOUNCE)), diff --git a/mm/vmstat.c b/mm/vmstat.c index 138bed53706e..ceda39b63d7e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -639,6 +639,7 @@ static const char * const vmstat_text[] = { "nr_slab_reclaimable", "nr_slab_unreclaimable", "nr_page_table_pages", + "nr_kernel_stack", "nr_unstable", "nr_bounce", "nr_vmscan_write", -- cgit v1.2.3 From 4b02108ac1b3354a22b0d83c684797692efdc395 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:33 -0700 Subject: mm: oom analysis: add shmem vmstat Recently we encountered OOM problems due to memory use of the GEM cache. Generally a large amuont of Shmem/Tmpfs pages tend to create a memory shortage problem. We often use the following calculation to determine the amount of shmem pages: shmem = NR_ACTIVE_ANON + NR_INACTIVE_ANON - NR_ANON_PAGES however the expression does not consider isolated and mlocked pages. This patch adds explicit accounting for pages used by shmem and tmpfs. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: Christoph Lameter Acked-by: Wu Fengguang Cc: David Rientjes Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 ++ fs/proc/meminfo.c | 2 ++ include/linux/mmzone.h | 1 + mm/filemap.c | 4 ++++ mm/migrate.c | 5 ++++- mm/page_alloc.c | 5 ++++- mm/vmstat.c | 2 +- 7 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index b560c17f6d4e..1fe5536d404f 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d FilePages: %8lu kB\n" "Node %d Mapped: %8lu kB\n" "Node %d AnonPages: %8lu kB\n" + "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" @@ -117,6 +118,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), + nid, K(node_page_state(nid, NR_SHMEM)), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 1fc588f430e4..171e052c07b3 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -81,6 +81,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Writeback: %8lu kB\n" "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" + "Shmem: %8lu kB\n" "Slab: %8lu kB\n" "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" @@ -125,6 +126,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d9335b8de84a..b3583b93b77e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,6 +100,7 @@ enum zone_stat_item { NR_BOUNCE, NR_VMSCAN_WRITE, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ diff --git a/mm/filemap.c b/mm/filemap.c index dd51c68e2b86..bcc7372aebbc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page) page->mapping = NULL; mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); + if (PageSwapBacked(page)) + __dec_zone_page_state(page, NR_SHMEM); BUG_ON(page_mapped(page)); /* @@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); + if (PageSwapBacked(page)) + __inc_zone_page_state(page, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; diff --git a/mm/migrate.c b/mm/migrate.c index 0edeac91348d..37143b924484 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -312,7 +312,10 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ __dec_zone_page_state(page, NR_FILE_PAGES); __inc_zone_page_state(newpage, NR_FILE_PAGES); - + if (PageSwapBacked(page)) { + __dec_zone_page_state(page, NR_SHMEM); + __inc_zone_page_state(newpage, NR_SHMEM); + } spin_unlock_irq(&mapping->tree_lock); return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e050f325ebd..e50c22545b8f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2139,7 +2139,7 @@ void show_free_areas(void) " unevictable:%lu" " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" - " mapped:%lu pagetables:%lu bounce:%lu\n", + " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), @@ -2153,6 +2153,7 @@ void show_free_areas(void) global_page_state(NR_SLAB_RECLAIMABLE), global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), + global_page_state(NR_SHMEM), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); @@ -2175,6 +2176,7 @@ void show_free_areas(void) " dirty:%lukB" " writeback:%lukB" " mapped:%lukB" + " shmem:%lukB" " slab_reclaimable:%lukB" " slab_unreclaimable:%lukB" " kernel_stack:%lukB" @@ -2200,6 +2202,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_FILE_DIRTY)), K(zone_page_state(zone, NR_WRITEBACK)), K(zone_page_state(zone, NR_FILE_MAPPED)), + K(zone_page_state(zone, NR_SHMEM)), K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), zone_page_state(zone, NR_KERNEL_STACK) * diff --git a/mm/vmstat.c b/mm/vmstat.c index ceda39b63d7e..7214a4511257 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -644,7 +644,7 @@ static const char * const vmstat_text[] = { "nr_bounce", "nr_vmscan_write", "nr_writeback_temp", - + "nr_shmem", #ifdef CONFIG_NUMA "numa_hit", "numa_miss", -- cgit v1.2.3