From 2a7684a23e9c263c2a1e8b2c0027ad1836a0f9df Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Sep 2009 11:50:12 +0200 Subject: HWPOISON: check and isolate corrupted free pages v2 If memory corruption hits the free buddy pages, we can safely ignore them. No one will access them until page allocation time, then prep_new_page() will automatically check and isolate PG_hwpoison page for us (for 0-order allocation). This patch expands prep_new_page() to check every component page in a high order page allocation, in order to completely stop PG_hwpoison pages from being recirculated. Note that the common case -- only allocating a single page, doesn't do any more work than before. Allocating > order 0 does a bit more work, but that's relatively uncommon. This simple implementation may drop some innocent neighbor pages, hopefully it is not a big problem because the event should be rare enough. This patch adds some runtime costs to high order page users. [AK: Improved description] v2: Andi Kleen: Port to -mm code Move check into separate function. Don't dump stack in bad_pages for hwpoisoned pages. Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- mm/page_alloc.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0de15f46987..9faa7ad95ac5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -234,6 +234,12 @@ static void bad_page(struct page *page) static unsigned long nr_shown; static unsigned long nr_unshown; + /* Don't complain about poisoned pages */ + if (PageHWPoison(page)) { + __ClearPageBuddy(page); + return; + } + /* * Allow a burst of 60 reports, then keep quiet for that minute; * or allow a steady drip of one report per second. @@ -646,7 +652,7 @@ static inline void expand(struct zone *zone, struct page *page, /* * This page is about to be returned from the page allocator */ -static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +static inline int check_new_page(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | @@ -655,6 +661,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) bad_page(page); return 1; } + return 0; +} + +static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +{ + int i; + + for (i = 0; i < (1 << order); i++) { + struct page *p = page + i; + if (unlikely(check_new_page(p))) + return 1; + } set_page_private(page, 0); set_page_refcounted(page); -- cgit v1.2.3 From 478b81fd84a299adb401dbbae296f3767e552999 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 21 Sep 2009 17:01:15 -0700 Subject: mm: remove obsoleted alloc_pages cpuset comment When a cpuset's nodemask is updated, all attached tasks have their cached task->mems_allowed updated by a heap instead of requiring an explicit call to cpuset_update_task_memory_state(), which has since been removed in 58568d2a8215cb6f55caf2332017d7bdff954e1c ("cpuset,mm: update tasks' mems_allowed in time"). Remove the obsoleted comment from the page allocator. Cc: Paul Menage Acked-by: Miao Xie Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0de15f46987..f4e929e356db 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1627,10 +1627,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); - - /* - * The task's cpuset might have expanded its set of allowable nodes - */ p->flags |= PF_MEMALLOC; lockdep_set_current_reclaim_state(gfp_mask); reclaim_state.reclaimed_slab = 0; -- cgit v1.2.3 From 112067f0905b2de862c607ee62411cf47d2fe5c4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 21 Sep 2009 17:01:16 -0700 Subject: memory hotplug: update zone pcp at memory online In my test, 128M memory is hot added, but zone's pcp batch is 0, which is an obvious error. When pages are onlined, zone pcp should be updated accordingly. [akpm@linux-foundation.org: fix warnings] Signed-off-by: Shaohua Li Cc: Mel Gorman Cc: Christoph Lameter Cc: Yakui Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/memory_hotplug.c | 1 + mm/page_alloc.c | 26 ++++++++++++++++++++++++++ 3 files changed, 29 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a72cc78e6b8..d3c8ae7c8015 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1058,6 +1058,8 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +extern void zone_pcp_update(struct zone *zone); + /* nommu.c */ extern atomic_long_t mmap_pages_allocated; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e4412a676c88..616236e6343f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -422,6 +422,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) zone->present_pages += onlined_pages; zone->zone_pgdat->node_present_pages += onlined_pages; + zone_pcp_update(zone); setup_per_zone_wmarks(); calculate_zone_inactive_ratio(zone); if (onlined_pages) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f4e929e356db..1a3a893ef50e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3142,6 +3142,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) return 0; } +static int __zone_pcp_update(void *data) +{ + struct zone *zone = data; + int cpu; + unsigned long batch = zone_batchsize(zone), flags; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; + + pset = zone_pcp(zone, cpu); + pcp = &pset->pcp; + + local_irq_save(flags); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + setup_pageset(pset, batch); + local_irq_restore(flags); + } + return 0; +} + +void zone_pcp_update(struct zone *zone) +{ + stop_machine(__zone_pcp_update, zone, NULL); +} + static __meminit void zone_pcp_init(struct zone *zone) { int cpu; -- cgit v1.2.3 From 6fb332fabd7288af9dbe7992394aa6ba97c1a537 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 21 Sep 2009 17:01:17 -0700 Subject: memory hotplug: exclude isolated page from pco page alloc Pages marked as isolated should not be allocated again. If such pages reside in pcp list, they can be allocated too, so there is a ping-pong memory offline frees some pages to pcp list and the pages get allocated and then memory offline frees them again, this loop will happen again and again. This should have no impact in normal code path, because in normal code path, pages in pcp list aren't isolated, and below loop will break in the first entry. Signed-off-by: Shaohua Li Cc: Mel Gorman Cc: Christoph Lameter Cc: Yakui Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1a3a893ef50e..8a8302711725 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1143,10 +1143,20 @@ again: /* Allocate more to the pcp list if necessary */ if (unlikely(&page->lru == &pcp->list)) { + int get_one_page = 0; + pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list, migratetype, cold); - page = list_entry(pcp->list.next, struct page, lru); + list_for_each_entry(page, &pcp->list, lru) { + if (get_pageblock_migratetype(page) != + MIGRATE_ISOLATE) { + get_one_page = 1; + break; + } + } + if (!get_one_page) + goto failed; } list_del(&page->lru); -- cgit v1.2.3 From 8e7e40d9658cf7b2ae2b76484e235799b3ddaa97 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 21 Sep 2009 17:01:18 -0700 Subject: memory hotplug: make pages from movable zone always isolatable Pages on movable zone have two types, MIGRATE_MOVABLE and MIGRATE_RESERVE, both them can be movable, because only movable memory allocation can get pages from movable zone. This makes pages in movable zone always be able to migrate. Signed-off-by: Shaohua Li Cc: Mel Gorman Cc: Christoph Lameter Cc: Yakui Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8a8302711725..7b4d4e4ea64f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4906,13 +4906,16 @@ int set_migratetype_isolate(struct page *page) struct zone *zone; unsigned long flags; int ret = -EBUSY; + int zone_idx; zone = page_zone(page); + zone_idx = zone_idx(zone); spin_lock_irqsave(&zone->lock, flags); /* * In future, more migrate types will be able to be isolation target. */ - if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) + if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && + zone_idx != ZONE_MOVABLE) goto out; set_pageblock_migratetype(page, MIGRATE_ISOLATE); move_freepages_block(zone, page, MIGRATE_ISOLATE); -- cgit v1.2.3 From 55a4462af5722d2814858bc51ee8d58ca29544ab Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Sep 2009 17:01:20 -0700 Subject: page_alloc: fix kernel-doc warning Ummark function as having kernel-doc notation, fixing the kernel-doc warning. Warning(mm/page_alloc.c:4519): No description found for parameter 'zone' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7b4d4e4ea64f..91ae36c30330 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4541,7 +4541,7 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } -/** +/* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance * to be referenced again before it is swapped out. -- cgit v1.2.3 From 3701b0332330ca1add3e5d56513ef201ff7efdbb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:29 -0700 Subject: mm: show_free_areas(): display slab pages in two separate fields If an OOM happens, we really want to know the number of remaining reclaimable pages. So the reclaimable slab and unreclaimable slab fields should not be combined for display. Signed-off-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 91ae36c30330..bdf12f36bae0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2138,7 +2138,8 @@ void show_free_areas(void) " inactive_file:%lu" " unevictable:%lu" " dirty:%lu writeback:%lu unstable:%lu\n" - " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", + " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" + " mapped:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), @@ -2148,8 +2149,8 @@ void show_free_areas(void) global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), global_page_state(NR_FREE_PAGES), - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE), + global_page_state(NR_SLAB_RECLAIMABLE), + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); -- cgit v1.2.3 From 4a0aa73f1d613bf19bc8610bf090c941ef49d720 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:30 -0700 Subject: mm: oom analysis: add per-zone statistics to show_free_areas() show_free_areas() displays only a limited amount of zone counters. This patch includes additional counters in the display to allow easier debugging. This may be especially useful if an OOM is due to running out of DMA memory. Signed-off-by: KOSAKI Motohiro Reviewed-by: Christoph Lameter Acked-by: Wu Fengguang Reviewed-by: Minchan Kim Reviewed-by: Rik van Riel Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdf12f36bae0..8fbf5a4f5cf7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2170,6 +2170,16 @@ void show_free_areas(void) " inactive_file:%lukB" " unevictable:%lukB" " present:%lukB" + " mlocked:%lukB" + " dirty:%lukB" + " writeback:%lukB" + " mapped:%lukB" + " slab_reclaimable:%lukB" + " slab_unreclaimable:%lukB" + " pagetables:%lukB" + " unstable:%lukB" + " bounce:%lukB" + " writeback_tmp:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" "\n", @@ -2184,6 +2194,16 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE_FILE)), K(zone_page_state(zone, NR_UNEVICTABLE)), K(zone->present_pages), + K(zone_page_state(zone, NR_MLOCK)), + K(zone_page_state(zone, NR_FILE_DIRTY)), + K(zone_page_state(zone, NR_WRITEBACK)), + K(zone_page_state(zone, NR_FILE_MAPPED)), + K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), + K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), + K(zone_page_state(zone, NR_PAGETABLE)), + K(zone_page_state(zone, NR_UNSTABLE_NFS)), + K(zone_page_state(zone, NR_BOUNCE)), + K(zone_page_state(zone, NR_WRITEBACK_TEMP)), zone->pages_scanned, (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); -- cgit v1.2.3 From 71de1ccbe1fb40203edd3beb473f8580d917d2ca Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:31 -0700 Subject: mm: oom analysis: add buffer cache information to show_free_areas() It is often useful to know the statistics for all pages that are handled like page cache pages when looking at OOM log output. Therefore show_free_areas() should also display buffer cache statistics. Signed-off-by: KOSAKI Motohiro Acked-by: Wu Fengguang Reviewed-by: Rik van Riel Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8fbf5a4f5cf7..494c09196c30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2137,7 +2137,7 @@ void show_free_areas(void) printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" " inactive_file:%lu" " unevictable:%lu" - " dirty:%lu writeback:%lu unstable:%lu\n" + " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), @@ -2148,6 +2148,7 @@ void show_free_areas(void) global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), + nr_blockdev_pages(), global_page_state(NR_FREE_PAGES), global_page_state(NR_SLAB_RECLAIMABLE), global_page_state(NR_SLAB_UNRECLAIMABLE), -- cgit v1.2.3 From c6a7f5728a1db45d30df55a01adc130b4ab0327c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:32 -0700 Subject: mm: oom analysis: Show kernel stack usage in /proc/meminfo and OOM log output The amount of memory allocated to kernel stacks can become significant and cause OOM conditions. However, we do not display the amount of memory consumed by stacks. Add code to display the amount of memory used for stacks in /proc/meminfo. Signed-off-by: KOSAKI Motohiro Reviewed-by: Christoph Lameter Reviewed-by: Minchan Kim Reviewed-by: Rik van Riel Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 3 +++ fs/proc/meminfo.c | 2 ++ include/linux/mmzone.h | 3 ++- kernel/fork.c | 11 +++++++++++ mm/page_alloc.c | 3 +++ mm/vmstat.c | 1 + 6 files changed, 22 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 91d4087b4039..b560c17f6d4e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d FilePages: %8lu kB\n" "Node %d Mapped: %8lu kB\n" "Node %d AnonPages: %8lu kB\n" + "Node %d KernelStack: %8lu kB\n" "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" @@ -116,6 +117,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), + nid, node_page_state(nid, NR_KERNEL_STACK) * + THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index d5c410d47fae..1fc588f430e4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -84,6 +84,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Slab: %8lu kB\n" "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" + "KernelStack: %8lu kB\n" "PageTables: %8lu kB\n" #ifdef CONFIG_QUICKLIST "Quicklists: %8lu kB\n" @@ -128,6 +129,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), + global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, K(global_page_state(NR_PAGETABLE)), #ifdef CONFIG_QUICKLIST K(quicklist_total_size()), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 889598537370..d9335b8de84a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -94,10 +94,11 @@ enum zone_stat_item { NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ + NR_KERNEL_STACK, + /* Second 128 byte cacheline */ NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, - /* Second 128 byte cacheline */ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ diff --git a/kernel/fork.c b/kernel/fork.c index 2cebfb23b0b8..d4638c8cc19e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -136,9 +136,17 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +static void account_kernel_stack(struct thread_info *ti, int account) +{ + struct zone *zone = page_zone(virt_to_page(ti)); + + mod_zone_page_state(zone, NR_KERNEL_STACK, account); +} + void free_task(struct task_struct *tsk) { prop_local_destroy_single(&tsk->dirties); + account_kernel_stack(tsk->stack, -1); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -253,6 +261,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; + + account_kernel_stack(ti, 1); + return tsk; out: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 494c09196c30..4e050f325ebd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2177,6 +2177,7 @@ void show_free_areas(void) " mapped:%lukB" " slab_reclaimable:%lukB" " slab_unreclaimable:%lukB" + " kernel_stack:%lukB" " pagetables:%lukB" " unstable:%lukB" " bounce:%lukB" @@ -2201,6 +2202,8 @@ void show_free_areas(void) K(zone_page_state(zone, NR_FILE_MAPPED)), K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), + zone_page_state(zone, NR_KERNEL_STACK) * + THREAD_SIZE / 1024, K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_UNSTABLE_NFS)), K(zone_page_state(zone, NR_BOUNCE)), diff --git a/mm/vmstat.c b/mm/vmstat.c index 138bed53706e..ceda39b63d7e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -639,6 +639,7 @@ static const char * const vmstat_text[] = { "nr_slab_reclaimable", "nr_slab_unreclaimable", "nr_page_table_pages", + "nr_kernel_stack", "nr_unstable", "nr_bounce", "nr_vmscan_write", -- cgit v1.2.3 From 4b02108ac1b3354a22b0d83c684797692efdc395 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:33 -0700 Subject: mm: oom analysis: add shmem vmstat Recently we encountered OOM problems due to memory use of the GEM cache. Generally a large amuont of Shmem/Tmpfs pages tend to create a memory shortage problem. We often use the following calculation to determine the amount of shmem pages: shmem = NR_ACTIVE_ANON + NR_INACTIVE_ANON - NR_ANON_PAGES however the expression does not consider isolated and mlocked pages. This patch adds explicit accounting for pages used by shmem and tmpfs. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: Christoph Lameter Acked-by: Wu Fengguang Cc: David Rientjes Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 ++ fs/proc/meminfo.c | 2 ++ include/linux/mmzone.h | 1 + mm/filemap.c | 4 ++++ mm/migrate.c | 5 ++++- mm/page_alloc.c | 5 ++++- mm/vmstat.c | 2 +- 7 files changed, 18 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index b560c17f6d4e..1fe5536d404f 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d FilePages: %8lu kB\n" "Node %d Mapped: %8lu kB\n" "Node %d AnonPages: %8lu kB\n" + "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" @@ -117,6 +118,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), + nid, K(node_page_state(nid, NR_SHMEM)), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 1fc588f430e4..171e052c07b3 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -81,6 +81,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Writeback: %8lu kB\n" "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" + "Shmem: %8lu kB\n" "Slab: %8lu kB\n" "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" @@ -125,6 +126,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d9335b8de84a..b3583b93b77e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,6 +100,7 @@ enum zone_stat_item { NR_BOUNCE, NR_VMSCAN_WRITE, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ diff --git a/mm/filemap.c b/mm/filemap.c index dd51c68e2b86..bcc7372aebbc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page) page->mapping = NULL; mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); + if (PageSwapBacked(page)) + __dec_zone_page_state(page, NR_SHMEM); BUG_ON(page_mapped(page)); /* @@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); + if (PageSwapBacked(page)) + __inc_zone_page_state(page, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; diff --git a/mm/migrate.c b/mm/migrate.c index 0edeac91348d..37143b924484 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -312,7 +312,10 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ __dec_zone_page_state(page, NR_FILE_PAGES); __inc_zone_page_state(newpage, NR_FILE_PAGES); - + if (PageSwapBacked(page)) { + __dec_zone_page_state(page, NR_SHMEM); + __inc_zone_page_state(newpage, NR_SHMEM); + } spin_unlock_irq(&mapping->tree_lock); return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e050f325ebd..e50c22545b8f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2139,7 +2139,7 @@ void show_free_areas(void) " unevictable:%lu" " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" - " mapped:%lu pagetables:%lu bounce:%lu\n", + " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), @@ -2153,6 +2153,7 @@ void show_free_areas(void) global_page_state(NR_SLAB_RECLAIMABLE), global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), + global_page_state(NR_SHMEM), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); @@ -2175,6 +2176,7 @@ void show_free_areas(void) " dirty:%lukB" " writeback:%lukB" " mapped:%lukB" + " shmem:%lukB" " slab_reclaimable:%lukB" " slab_unreclaimable:%lukB" " kernel_stack:%lukB" @@ -2200,6 +2202,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_FILE_DIRTY)), K(zone_page_state(zone, NR_WRITEBACK)), K(zone_page_state(zone, NR_FILE_MAPPED)), + K(zone_page_state(zone, NR_SHMEM)), K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), zone_page_state(zone, NR_KERNEL_STACK) * diff --git a/mm/vmstat.c b/mm/vmstat.c index ceda39b63d7e..7214a4511257 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -644,7 +644,7 @@ static const char * const vmstat_text[] = { "nr_bounce", "nr_vmscan_write", "nr_writeback_temp", - + "nr_shmem", #ifdef CONFIG_NUMA "numa_hit", "numa_miss", -- cgit v1.2.3 From b259fbde0a86085264c89aa2ce9c6e35792a1aad Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 21 Sep 2009 17:01:34 -0700 Subject: mm: update alloc_flags after oom killer has been called It is possible for the oom killer to select current as the task to kill. When this happens, alloc_flags needs to be updated accordingly to set ALLOC_NO_WATERMARKS so the subsequent allocation attempt may use memory reserves as the result of its thread having TIF_MEMDIE set if the allocation is not __GFP_NOMEMALLOC. Acked-by: Mel Gorman Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e50c22545b8f..b6d0d09557ef 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1771,6 +1771,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, wake_all_kswapd(order, zonelist, high_zoneidx); +restart: /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according @@ -1778,7 +1779,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, */ alloc_flags = gfp_to_alloc_flags(gfp_mask); -restart: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, -- cgit v1.2.3 From a731286de62294b63d8ceb3c5914ac52cc17e690 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 21 Sep 2009 17:01:37 -0700 Subject: mm: vmstat: add isolate pages If the system is running a heavy load of processes then concurrent reclaim can isolate a large number of pages from the LRU. /proc/vmstat and the output generated for an OOM do not show how many pages were isolated. This has been observed during process fork bomb testing (mstctl11 in LTP). This patch shows the information about isolated pages. Reproduced via: ----------------------- % ./hackbench 140 process 1000 => OOM occur active_anon:146 inactive_anon:0 isolated_anon:49245 active_file:79 inactive_file:18 isolated_file:113 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39 free:370 slab_reclaimable:309 slab_unreclaimable:5492 mapped:53 shmem:15 pagetables:28140 bounce:0 Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Acked-by: Wu Fengguang Reviewed-by: Minchan Kim Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ mm/migrate.c | 11 +++++++++++ mm/page_alloc.c | 12 +++++++++--- mm/vmscan.c | 12 +++++++++++- mm/vmstat.c | 2 ++ 5 files changed, 35 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b3583b93b77e..9c50309b30a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,6 +100,8 @@ enum zone_stat_item { NR_BOUNCE, NR_VMSCAN_WRITE, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ diff --git a/mm/migrate.c b/mm/migrate.c index 37143b924484..b535a2c1656c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *l) list_for_each_entry_safe(page, page2, l, lru) { list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + !!page_is_file_cache(page)); putback_lru_page(page); count++; } @@ -698,6 +700,8 @@ unlock: * restored. */ list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + !!page_is_file_cache(page)); putback_lru_page(page); } @@ -742,6 +746,13 @@ int migrate_pages(struct list_head *from, struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; + unsigned long flags; + + local_irq_save(flags); + list_for_each_entry(page, from, lru) + __inc_zone_page_state(page, NR_ISOLATED_ANON + + !!page_is_file_cache(page)); + local_irq_restore(flags); if (!swapwrite) current->flags |= PF_SWAPWRITE; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b6d0d09557ef..afda8fd16484 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2134,16 +2134,18 @@ void show_free_areas(void) } } - printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" - " inactive_file:%lu" + printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" + " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu" " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), - global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), + global_page_state(NR_ISOLATED_ANON), + global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_FILE), + global_page_state(NR_ISOLATED_FILE), global_page_state(NR_UNEVICTABLE), global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), @@ -2171,6 +2173,8 @@ void show_free_areas(void) " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" + " isolated(anon):%lukB" + " isolated(file):%lukB" " present:%lukB" " mlocked:%lukB" " dirty:%lukB" @@ -2197,6 +2201,8 @@ void show_free_areas(void) K(zone_page_state(zone, NR_ACTIVE_FILE)), K(zone_page_state(zone, NR_INACTIVE_FILE)), K(zone_page_state(zone, NR_UNEVICTABLE)), + K(zone_page_state(zone, NR_ISOLATED_ANON)), + K(zone_page_state(zone, NR_ISOLATED_FILE)), K(zone->present_pages), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_FILE_DIRTY)), diff --git a/mm/vmscan.c b/mm/vmscan.c index d86a91f8c16b..75c29974e878 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1072,6 +1072,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, unsigned long nr_active; unsigned int count[NR_LRU_LISTS] = { 0, }; int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; + unsigned long nr_anon; + unsigned long nr_file; nr_taken = sc->isolate_pages(sc->swap_cluster_max, &page_list, &nr_scan, sc->order, mode, @@ -1102,6 +1104,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, __mod_zone_page_state(zone, NR_INACTIVE_ANON, -count[LRU_INACTIVE_ANON]); + nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; + nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; + __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON]; reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; @@ -1169,6 +1175,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, spin_lock_irq(&zone->lru_lock); } } + __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + } while (nr_scanned < max_scan); done: @@ -1279,6 +1288,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); else __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { @@ -1329,7 +1339,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, LRU_ACTIVE + file * LRU_FILE); move_active_pages_to_lru(zone, &l_inactive, LRU_BASE + file * LRU_FILE); - + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); } diff --git a/mm/vmstat.c b/mm/vmstat.c index 7214a4511257..c81321f9feec 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -644,6 +644,8 @@ static const char * const vmstat_text[] = { "nr_bounce", "nr_vmscan_write", "nr_writeback_temp", + "nr_isolated_anon", + "nr_isolated_file", "nr_shmem", #ifdef CONFIG_NUMA "numa_hit", -- cgit v1.2.3 From 945a11136ebdfa7fcce319ee6215958e84cb85f6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 21 Sep 2009 17:01:47 -0700 Subject: mm: add gfp mask checking for __get_free_pages() __get_free_pages() with __GFP_HIGHMEM is not safe because the return address cannot represent a highmem page. get_zeroed_page() already has such a debug checking. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index afda8fd16484..81926c7ef6f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1922,31 +1922,25 @@ EXPORT_SYMBOL(__alloc_pages_nodemask); */ unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { - struct page * page; + struct page *page; + + /* + * __get_free_pages() returns a 32-bit address, which cannot represent + * a highmem page + */ + VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); + page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page); } - EXPORT_SYMBOL(__get_free_pages); unsigned long get_zeroed_page(gfp_t gfp_mask) { - struct page * page; - - /* - * get_zeroed_page() returns a 32-bit address, which cannot represent - * a highmem page - */ - VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); - - page = alloc_pages(gfp_mask | __GFP_ZERO, 0); - if (page) - return (unsigned long) page_address(page); - return 0; + return __get_free_pages(gfp_mask | __GFP_ZERO, 0); } - EXPORT_SYMBOL(get_zeroed_page); void __pagevec_free(struct pagevec *pvec) -- cgit v1.2.3 From 451ea25da71590361c71bf3044c55b870a887d53 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 21 Sep 2009 17:01:48 -0700 Subject: mm: perform non-atomic test-clear of PG_mlocked on free By the time PG_mlocked is cleared in the page freeing path, nobody else is looking at our page->flags anymore. It is thus safe to make the test-and-clear non-atomic and thereby removing an unnecessary and expensive operation from a hotpath. Signed-off-by: Johannes Weiner Reviewed-by: Christoph Lameter Reviewed-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Mel Gorman Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 12 +++++++++--- mm/page_alloc.c | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2b87acfc5f87..d07c0bb2203a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page) \ static inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &page->flags); } +#define __TESTCLEARFLAG(uname, lname) \ +static inline int __TestClearPage##uname(struct page *page) \ + { return __test_and_clear_bit(PG_##lname, &page->flags); } #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) @@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) { } #define TESTCLEARFLAG_FALSE(uname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } +#define __TESTCLEARFLAG_FALSE(uname) \ +static inline int __TestClearPage##uname(struct page *page) { return 0; } + struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) @@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) - TESTSCFLAG(Mlocked, mlocked) + TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) #else #define MLOCK_PAGES 0 -PAGEFLAG_FALSE(Mlocked) - SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) + TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 81926c7ef6f0..9242d13f4ff3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -557,7 +557,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) unsigned long flags; int i; int bad = 0; - int wasMlocked = TestClearPageMlocked(page); + int wasMlocked = __TestClearPageMlocked(page); kmemcheck_free_shadow(page, order); @@ -1026,7 +1026,7 @@ static void free_hot_cold_page(struct page *page, int cold) struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; - int wasMlocked = TestClearPageMlocked(page); + int wasMlocked = __TestClearPageMlocked(page); kmemcheck_free_shadow(page, 0); -- cgit v1.2.3 From 2f66a68f3fac2e94da360c342ff78ab45553f86c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:31 -0700 Subject: page-allocator: change migratetype for all pageblocks within a high-order page during __rmqueue_fallback When there are no pages of a target migratetype free, the page allocator selects a high-order block of another migratetype to allocate from. When the order of the page taken is greater than pageblock_order, all pageblocks within that high-order page should change migratetype so that pages are later freed to the correct free-lists. The current behaviour is that pageblocks change migratetype if the order being split matches the pageblock_order. When pageblock_order < MAX_ORDER-1, ownership is not changing correct and pages are being later freed to the incorrect list and this impacts fragmentation avoidance. This patch changes all pageblocks within the high-order page being split to the correct migratetype. Without the patch, allocation success rates for hugepages under stress were about 59% of physical memory on x86-64. With the patch applied, this goes up to 65%. Signed-off-by: Mel Gorman Cc: Andy Whitcroft Cc: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9242d13f4ff3..20759803a64a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -783,6 +783,17 @@ static int move_freepages_block(struct zone *zone, struct page *page, return move_freepages(zone, start_page, end_page, migratetype); } +static void change_pageblock_range(struct page *pageblock_page, + int start_order, int migratetype) +{ + int nr_pageblocks = 1 << (start_order - pageblock_order); + + while (nr_pageblocks--) { + set_pageblock_migratetype(pageblock_page, migratetype); + pageblock_page += pageblock_nr_pages; + } +} + /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) @@ -836,8 +847,9 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) list_del(&page->lru); rmv_page_order(page); - if (current_order == pageblock_order) - set_pageblock_migratetype(page, + /* Take ownership for orders >= pageblock_order */ + if (current_order >= pageblock_order) + change_pageblock_range(page, current_order, start_migratetype); expand(zone, page, order, current_order, area, migratetype); -- cgit v1.2.3 From 38a398572fa2d8124f7479e40db581b5b72719c9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:39 -0700 Subject: page-allocator: remove dead function free_cold_page() The function free_cold_page() has no callers so delete it. Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 1 - mm/page_alloc.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7c777a0da17a..c32bfa8e7f1e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_page(struct page *page); -extern void free_cold_page(struct page *page); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 20759803a64a..913a8ebd3a8e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1079,11 +1079,6 @@ void free_hot_page(struct page *page) free_hot_cold_page(page, 0); } -void free_cold_page(struct page *page) -{ - free_hot_cold_page(page, 1); -} - /* * split_page takes a non-compound higher-order page, and splits it into * n (1< Date: Mon, 21 Sep 2009 17:02:41 -0700 Subject: tracing, page-allocator: add trace events for page allocation and page freeing This patch adds trace events for the allocation and freeing of pages, including the freeing of pagevecs. Using the events, it will be known what struct page and pfns are being allocated and freed and what the call site was in many cases. The page alloc tracepoints be used as an indicator as to whether the workload was heavily dependant on the page allocator or not. You can make a guess based on vmstat but you can't get a per-process breakdown. Depending on the call path, the call_site for page allocation may be __get_free_pages() instead of a useful callsite. Instead of passing down a return address similar to slab debugging, the user should enable the stacktrace and seg-addr options to get a proper stack trace. The pagevec free tracepoint has a different usecase. It can be used to get a idea of how many pages are being dumped off the LRU and whether it is kswapd doing the work or a process doing direct reclaim. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 7 ++++- 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 1493c541f9c4..0d358a0d45c1 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); + +TRACE_EVENT(mm_page_free_direct, + + TP_PROTO(struct page *page, unsigned int order), + + TP_ARGS(page, order), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + ), + + TP_printk("page=%p pfn=%lu order=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order) +); + +TRACE_EVENT(mm_pagevec_free, + + TP_PROTO(struct page *page, int cold), + + TP_ARGS(page, cold), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, cold ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->cold = cold; + ), + + TP_printk("page=%p pfn=%lu order=0 cold=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->cold) +); + +TRACE_EVENT(mm_page_alloc, + + TP_PROTO(struct page *page, unsigned int order, + gfp_t gfp_flags, int migratetype), + + TP_ARGS(page, order, gfp_flags, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( gfp_t, gfp_flags ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->gfp_flags = gfp_flags; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + show_gfp_flags(__entry->gfp_flags)) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 913a8ebd3a8e..80f954d82d77 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1076,6 +1076,7 @@ static void free_hot_cold_page(struct page *page, int cold) void free_hot_page(struct page *page) { + trace_mm_page_free_direct(page, 0); free_hot_cold_page(page, 0); } @@ -1920,6 +1921,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); + trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; } EXPORT_SYMBOL(__alloc_pages_nodemask); @@ -1954,13 +1956,16 @@ void __pagevec_free(struct pagevec *pvec) { int i = pagevec_count(pvec); - while (--i >= 0) + while (--i >= 0) { + trace_mm_pagevec_free(pvec->pages[i], pvec->cold); free_hot_cold_page(pvec->pages[i], pvec->cold); + } } void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { + trace_mm_page_free_direct(page, order); if (order == 0) free_hot_page(page); else -- cgit v1.2.3 From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:42 -0700 Subject: tracing, page-allocator: add trace events for anti-fragmentation falling back to other migratetypes Fragmentation avoidance depends on being able to use free pages from lists of the appropriate migrate type. In the event this is not possible, __rmqueue_fallback() selects a different list and in some circumstances change the migratetype of the pageblock. Simplistically, the more times this event occurs, the more likely that fragmentation will be a problem later for hugepage allocation at least but there are other considerations such as the order of page being split to satisfy the allocation. This patch adds a trace event for __rmqueue_fallback() that reports what page is being used for the fallback, the orders of relevant pages, the desired migratetype and the migratetype of the lists being used, whether the pageblock changed type and whether this event is important with respect to fragmentation avoidance or not. This information can be used to help analyse fragmentation avoidance and help decide whether min_free_kbytes should be increased or not. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 4 ++++ 2 files changed, 42 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 0d358a0d45c1..aae16ee17601 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_extfrag, + + TP_PROTO(struct page *page, + int alloc_order, int fallback_order, + int alloc_migratetype, int fallback_migratetype), + + TP_ARGS(page, + alloc_order, fallback_order, + alloc_migratetype, fallback_migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, alloc_order ) + __field( int, fallback_order ) + __field( int, alloc_migratetype ) + __field( int, fallback_migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->alloc_order = alloc_order; + __entry->fallback_order = fallback_order; + __entry->alloc_migratetype = alloc_migratetype; + __entry->fallback_migratetype = fallback_migratetype; + ), + + TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->alloc_order, + __entry->fallback_order, + pageblock_order, + __entry->alloc_migratetype, + __entry->fallback_migratetype, + __entry->fallback_order < pageblock_order, + __entry->alloc_migratetype == __entry->fallback_migratetype) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 80f954d82d77..77f517c18b37 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -853,6 +853,10 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) start_migratetype); expand(zone, page, order, current_order, area, migratetype); + + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, migratetype); + return page; } } -- cgit v1.2.3 From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:44 -0700 Subject: tracing, page-allocator: add trace event for page traffic related to the buddy lists The page allocation trace event reports that a page was successfully allocated but it does not specify where it came from. When analysing performance, it can be important to distinguish between pages coming from the per-cpu allocator and pages coming from the buddy lists as the latter requires the zone lock to the taken and more data structures to be examined. This patch adds a trace event for __rmqueue reporting when a page is being allocated from the buddy lists. It distinguishes between being called to refill the per-cpu lists or whether it is a high-order allocation. Similarly, this patch adds an event to catch when the PCP lists are being drained a little and pages are going back to the buddy lists. This is trickier to draw conclusions from but high activity on those events could explain why there were a large number of cache misses on a page-allocator-intensive workload. The coalescing and splitting of buddies involves a lot of writing of page metadata and cache line bounces not to mention the acquisition of an interrupt-safe lock necessary to enter this path. [akpm@linux-foundation.org: fix build] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 3 +++ 2 files changed, 54 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aae16ee17601..eaf46bdd18a5 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_zone_locked, + + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + __entry->order == 0) +); + +TRACE_EVENT(mm_page_pcpu_drain, + + TP_PROTO(struct page *page, int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype) +); + TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 77f517c18b37..4c847cc57caf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -535,6 +536,7 @@ static void free_pages_bulk(struct zone *zone, int count, page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); + trace_mm_page_pcpu_drain(page, order, page_private(page)); __free_one_page(page, zone, order, page_private(page)); } spin_unlock(&zone->lock); @@ -890,6 +892,7 @@ retry_reserve: } } + trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } -- cgit v1.2.3 From 78986a678f6ec3759a01976749f4437d8bf2d6c3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:03:02 -0700 Subject: page-allocator: limit the number of MIGRATE_RESERVE pageblocks per zone After anti-fragmentation was merged, a bug was reported whereby devices that depended on high-order atomic allocations were failing. The solution was to preserve a property in the buddy allocator which tended to keep the minimum number of free pages in the zone at the lower physical addresses and contiguous. To preserve this property, MIGRATE_RESERVE was introduced and a number of pageblocks at the start of a zone would be marked "reserve", the number of which depended on min_free_kbytes. Anti-fragmentation works by avoiding the mixing of page migratetypes within the same pageblock. One way of helping this is to increase min_free_kbytes because it becomes less like that it will be necessary to place pages of of MIGRATE_RESERVE is unbounded, the free memory is kept there in large contiguous blocks instead of helping anti-fragmentation as much as it should. With the page-allocator tracepoint patches applied, it was found during anti-fragmentation tests that the number of fragmentation-related events were far higher than expected even with min_free_kbytes at higher values. This patch limits the number of MIGRATE_RESERVE blocks that exist per zone to two. For example, with a sufficient min_free_kbytes, 4MB of memory will be kept aside on an x86-64 and remain more or less free and contiguous for the systems uptime. This should be sufficient for devices depending on high-order atomic allocations while helping fragmentation control when min_free_kbytes is tuned appropriately. As side-effect of this patch is that the reserve variable is converted to int as unsigned long was the wrong type to use when ensuring that only the required number of reserve blocks are created. With the patches applied, fragmentation-related events as measured by the page allocator tracepoints were significantly reduced when running some fragmentation stress-tests on systems with min_free_kbytes tuned to a value appropriate for hugepage allocations at runtime. On x86, the events recorded were reduced by 99.8%, on x86-64 by 99.72% and on ppc64 by 99.83%. Signed-off-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4c847cc57caf..33b1a4762a7b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2836,7 +2836,8 @@ static void setup_zone_migrate_reserve(struct zone *zone) { unsigned long start_pfn, pfn, end_pfn; struct page *page; - unsigned long reserve, block_migratetype; + unsigned long block_migratetype; + int reserve; /* Get the start pfn, end pfn and the number of blocks to reserve */ start_pfn = zone->zone_start_pfn; @@ -2844,6 +2845,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> pageblock_order; + /* + * Reserve blocks are generally in place to help high-order atomic + * allocations that are short-lived. A min_free_kbytes value that + * would result in more than 2 reserve blocks for atomic allocations + * is assumed to be in place to help anti-fragmentation for the + * future allocation of hugepages at runtime. + */ + reserve = min(2, reserve); + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { if (!pfn_valid(pfn)) continue; -- cgit v1.2.3 From 2c85f51d222ccdd8c401d77a36b723a89156810d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 21 Sep 2009 17:03:07 -0700 Subject: mm: also use alloc_large_system_hash() for the PID hash table This is being done by allowing boot time allocations to specify that they may want a sub-page sized amount of memory. Overall this seems more consistent with the other hash table allocations, and allows making two supposedly mm-only variables really mm-only (nr_{kernel,all}_pages). Signed-off-by: Jan Beulich Cc: Ingo Molnar Cc: "Eric W. Biederman" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 5 ++--- kernel/pid.c | 15 ++++----------- mm/page_alloc.c | 13 ++++++++++--- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index bc3ab7073695..dd97fb8408a8 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size) } #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ -extern unsigned long __meminitdata nr_kernel_pages; -extern unsigned long __meminitdata nr_all_pages; - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, @@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned long limit); #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ +#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min + * shift passed via *_hash_shift */ /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. diff --git a/kernel/pid.c b/kernel/pid.c index 31310b5d3f50..d3f722d20f9c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -40,7 +40,7 @@ #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) static struct hlist_head *pid_hash; -static int pidhash_shift; +static unsigned int pidhash_shift = 4; struct pid init_struct_pid = INIT_STRUCT_PID; int pid_max = PID_MAX_DEFAULT; @@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) void __init pidhash_init(void) { int i, pidhash_size; - unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); - pidhash_shift = max(4, fls(megabytes * 4)); - pidhash_shift = min(12, pidhash_shift); + pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, + HASH_EARLY | HASH_SMALL, + &pidhash_shift, NULL, 4096); pidhash_size = 1 << pidhash_shift; - printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", - pidhash_size, pidhash_shift, - pidhash_size * sizeof(struct hlist_head)); - - pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); - if (!pid_hash) - panic("Could not alloc pidhash!\n"); for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 33b1a4762a7b..770f011e1c12 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -124,8 +124,8 @@ static char * const zone_names[MAX_NR_ZONES] = { int min_free_kbytes = 1024; -unsigned long __meminitdata nr_kernel_pages; -unsigned long __meminitdata nr_all_pages; +static unsigned long __meminitdata nr_kernel_pages; +static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP @@ -4821,7 +4821,14 @@ void *__init alloc_large_system_hash(const char *tablename, numentries <<= (PAGE_SHIFT - scale); /* Make sure we've got at least a 0-order allocation.. */ - if (unlikely((numentries * bucketsize) < PAGE_SIZE)) + if (unlikely(flags & HASH_SMALL)) { + /* Makes no sense without HASH_EARLY */ + WARN_ON(!(flags & HASH_EARLY)); + if (!(numentries >> *_hash_shift)) { + numentries = 1UL << *_hash_shift; + BUG_ON(!numentries); + } + } else if (unlikely((numentries * bucketsize) < PAGE_SIZE)) numentries = PAGE_SIZE / bucketsize; } numentries = roundup_pow_of_two(numentries); -- cgit v1.2.3 From f86296317434b21585e229f6c49a33cb9ebab4d3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 21 Sep 2009 17:03:11 -0700 Subject: mm: do batched scans for mem_cgroup For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in which case shrink_list() _still_ calls isolate_pages() with the much larger SWAP_CLUSTER_MAX. It effectively scales up the inactive list scan rate by up to 32 times. For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4. So when shrink_zone() expects to scan 4 pages in the active/inactive list, the active list will be scanned 4 pages, while the inactive list will be (over) scanned SWAP_CLUSTER_MAX=32 pages in effect. And that could break the balance between the two lists. It can further impact the scan of anon active list, due to the anon active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone(): inactive anon list over scanned => inactive_anon_is_low() == TRUE => shrink_active_list() => active anon list over scanned So the end result may be - anon inactive => over scanned - anon active => over scanned (maybe not as much) - file inactive => over scanned - file active => under scanned (relatively) The accesses to nr_saved_scan are not lock protected and so not 100% accurate, however we can tolerate small errors and the resulted small imbalanced scan rates between zones. Cc: Rik van Riel Reviewed-by: KOSAKI Motohiro Acked-by: Balbir Singh Reviewed-by: Minchan Kim Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++++- mm/page_alloc.c | 2 +- mm/vmscan.c | 20 +++++++++++--------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9c50309b30a1..c188ea624c74 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -273,6 +273,11 @@ struct zone_reclaim_stat { */ unsigned long recent_rotated[2]; unsigned long recent_scanned[2]; + + /* + * accumulated for batching + */ + unsigned long nr_saved_scan[NR_LRU_LISTS]; }; struct zone { @@ -327,7 +332,6 @@ struct zone { spinlock_t lru_lock; struct zone_lru { struct list_head list; - unsigned long nr_saved_scan; /* accumulated for batching */ } lru[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 770f011e1c12..84d9da1e8f4c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone_pcp_init(zone); for_each_lru(l) { INIT_LIST_HEAD(&zone->lru[l].list); - zone->lru[l].nr_saved_scan = 0; + zone->reclaim_stat.nr_saved_scan[l] = 0; } zone->reclaim_stat.recent_rotated[0] = 0; zone->reclaim_stat.recent_rotated[1] = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 5432c230c4cb..0e7f5e4a22d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone, enum lru_list l; unsigned long nr_reclaimed = sc->nr_reclaimed; unsigned long swap_cluster_max = sc->swap_cluster_max; + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); int noswap = 0; /* If we have no swap space, do not bother scanning anon pages. */ @@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone, scan >>= priority; scan = (scan * percent[file]) / 100; } - if (scanning_global_lru(sc)) - nr[l] = nr_scan_try_batch(scan, - &zone->lru[l].nr_saved_scan, - swap_cluster_max); - else - nr[l] = scan; + nr[l] = nr_scan_try_batch(scan, + &reclaim_stat->nr_saved_scan[l], + swap_cluster_max); } while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, { struct zone *zone; unsigned long nr_reclaimed = 0; + struct zone_reclaim_stat *reclaim_stat; for_each_populated_zone(zone) { enum lru_list l; @@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, l == LRU_ACTIVE_FILE)) continue; - zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; - if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { + reclaim_stat = get_reclaim_stat(zone, sc); + reclaim_stat->nr_saved_scan[l] += + (lru_pages >> prio) + 1; + if (reclaim_stat->nr_saved_scan[l] + >= nr_pages || pass > 3) { unsigned long nr_to_scan; - zone->lru[l].nr_saved_scan = 0; + reclaim_stat->nr_saved_scan[l] = 0; nr_to_scan = min(nr_pages, lru_pages); nr_reclaimed += shrink_list(l, nr_to_scan, zone, sc, prio); -- cgit v1.2.3 From 5f8dcc21211a3d4e3a7a5ca366b469fb88117f61 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:03:19 -0700 Subject: page-allocator: split per-cpu list into one-list-per-migrate-type The following two patches remove searching in the page allocator fast-path by maintaining multiple free-lists in the per-cpu structure. At the time the search was introduced, increasing the per-cpu structures would waste a lot of memory as per-cpu structures were statically allocated at compile-time. This is no longer the case. The patches are as follows. They are based on mmotm-2009-08-27. Patch 1 adds multiple lists to struct per_cpu_pages, one per migratetype that can be stored on the PCP lists. Patch 2 notes that the pcpu drain path check empty lists multiple times. The patch reduces the number of checks by maintaining a count of free lists encountered. Lists containing pages will then free multiple pages in batch The patches were tested with kernbench, netperf udp/tcp, hackbench and sysbench. The netperf tests were not bound to any CPU in particular and were run such that the results should be 99% confidence that the reported results are within 1% of the estimated mean. sysbench was run with a postgres background and read-only tests. Similar to netperf, it was run multiple times so that it's 99% confidence results are within 1%. The patches were tested on x86, x86-64 and ppc64 as x86: Intel Pentium D 3GHz with 8G RAM (no-brand machine) kernbench - No significant difference, variance well within noise netperf-udp - 1.34% to 2.28% gain netperf-tcp - 0.45% to 1.22% gain hackbench - Small variances, very close to noise sysbench - Very small gains x86-64: AMD Phenom 9950 1.3GHz with 8G RAM (no-brand machine) kernbench - No significant difference, variance well within noise netperf-udp - 1.83% to 10.42% gains netperf-tcp - No conclusive until buffer >= PAGE_SIZE 4096 +15.83% 8192 + 0.34% (not significant) 16384 + 1% hackbench - Small gains, very close to noise sysbench - 0.79% to 1.6% gain ppc64: PPC970MP 2.5GHz with 10GB RAM (it's a terrasoft powerstation) kernbench - No significant difference, variance well within noise netperf-udp - 2-3% gain for almost all buffer sizes tested netperf-tcp - losses on small buffers, gains on larger buffers possibly indicates some bad caching effect. hackbench - No significant difference sysbench - 2-4% gain This patch: Currently the per-cpu page allocator searches the PCP list for pages of the correct migrate-type to reduce the possibility of pages being inappropriate placed from a fragmentation perspective. This search is potentially expensive in a fast-path and undesirable. Splitting the per-cpu list into multiple lists increases the size of a per-cpu structure and this was potentially a major problem at the time the search was introduced. These problem has been mitigated as now only the necessary number of structures is allocated for the running system. This patch replaces a list search in the per-cpu allocator with one list per migrate type. The potential snag with this approach is when bulk freeing pages. We round-robin free pages based on migrate type which has little bearing on the cache hotness of the page and potentially checks empty lists repeatedly in the event the majority of PCP pages are of one type. Signed-off-by: Mel Gorman Acked-by: Nick Piggin Cc: Christoph Lameter Cc: Minchan Kim Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 ++- mm/page_alloc.c | 106 +++++++++++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 48 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c188ea624c74..652ef01be582 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,6 +38,7 @@ #define MIGRATE_UNMOVABLE 0 #define MIGRATE_RECLAIMABLE 1 #define MIGRATE_MOVABLE 2 +#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ #define MIGRATE_RESERVE 3 #define MIGRATE_ISOLATE 4 /* can't allocate from here */ #define MIGRATE_TYPES 5 @@ -169,7 +170,9 @@ struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ - struct list_head list; /* the list of pages */ + + /* Lists of pages, one per migrate type stored on the pcp-lists */ + struct list_head lists[MIGRATE_PCPTYPES]; }; struct per_cpu_pageset { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 84d9da1e8f4c..1b1c39e6a9b8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -511,7 +511,7 @@ static inline int free_pages_check(struct page *page) } /* - * Frees a list of pages. + * Frees a number of pages from the PCP lists * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * @@ -521,23 +521,36 @@ static inline int free_pages_check(struct page *page) * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ -static void free_pages_bulk(struct zone *zone, int count, - struct list_head *list, int order) +static void free_pcppages_bulk(struct zone *zone, int count, + struct per_cpu_pages *pcp) { + int migratetype = 0; + spin_lock(&zone->lock); zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; - __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); + __mod_zone_page_state(zone, NR_FREE_PAGES, count); while (count--) { struct page *page; + struct list_head *list; + + /* + * Remove pages from lists in a round-robin fashion. This spinning + * around potentially empty lists is bloody awful, alternatives that + * don't suck are welcome + */ + do { + if (++migratetype == MIGRATE_PCPTYPES) + migratetype = 0; + list = &pcp->lists[migratetype]; + } while (list_empty(list)); - VM_BUG_ON(list_empty(list)); page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); - trace_mm_page_pcpu_drain(page, order, page_private(page)); - __free_one_page(page, zone, order, page_private(page)); + trace_mm_page_pcpu_drain(page, 0, migratetype); + __free_one_page(page, zone, 0, migratetype); } spin_unlock(&zone->lock); } @@ -953,7 +966,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) to_drain = pcp->batch; else to_drain = pcp->count; - free_pages_bulk(zone, to_drain, &pcp->list, 0); + free_pcppages_bulk(zone, to_drain, pcp); pcp->count -= to_drain; local_irq_restore(flags); } @@ -979,7 +992,7 @@ static void drain_pages(unsigned int cpu) pcp = &pset->pcp; local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); + free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; local_irq_restore(flags); } @@ -1045,6 +1058,7 @@ static void free_hot_cold_page(struct page *page, int cold) struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; + int migratetype; int wasMlocked = __TestClearPageMlocked(page); kmemcheck_free_shadow(page, 0); @@ -1062,21 +1076,39 @@ static void free_hot_cold_page(struct page *page, int cold) kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp; - set_page_private(page, get_pageblock_migratetype(page)); + migratetype = get_pageblock_migratetype(page); + set_page_private(page, migratetype); local_irq_save(flags); if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_event(PGFREE); + /* + * We only track unmovable, reclaimable and movable on pcp lists. + * Free ISOLATE pages back to the allocator because they are being + * offlined but treat RESERVE as movable pages so we can get those + * areas back if necessary. Otherwise, we may have to free + * excessively into the page allocator + */ + if (migratetype >= MIGRATE_PCPTYPES) { + if (unlikely(migratetype == MIGRATE_ISOLATE)) { + free_one_page(zone, page, 0, migratetype); + goto out; + } + migratetype = MIGRATE_MOVABLE; + } + if (cold) - list_add_tail(&page->lru, &pcp->list); + list_add_tail(&page->lru, &pcp->lists[migratetype]); else - list_add(&page->lru, &pcp->list); + list_add(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { - free_pages_bulk(zone, pcp->batch, &pcp->list, 0); + free_pcppages_bulk(zone, pcp->batch, pcp); pcp->count -= pcp->batch; } + +out: local_irq_restore(flags); put_cpu(); } @@ -1134,46 +1166,24 @@ again: cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; + struct list_head *list; pcp = &zone_pcp(zone, cpu)->pcp; + list = &pcp->lists[migratetype]; local_irq_save(flags); - if (!pcp->count) { - pcp->count = rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list, - migratetype, cold); - if (unlikely(!pcp->count)) - goto failed; - } - - /* Find a page of the appropriate migrate type */ - if (cold) { - list_for_each_entry_reverse(page, &pcp->list, lru) - if (page_private(page) == migratetype) - break; - } else { - list_for_each_entry(page, &pcp->list, lru) - if (page_private(page) == migratetype) - break; - } - - /* Allocate more to the pcp list if necessary */ - if (unlikely(&page->lru == &pcp->list)) { - int get_one_page = 0; - + if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list, + pcp->batch, list, migratetype, cold); - list_for_each_entry(page, &pcp->list, lru) { - if (get_pageblock_migratetype(page) != - MIGRATE_ISOLATE) { - get_one_page = 1; - break; - } - } - if (!get_one_page) + if (unlikely(list_empty(list))) goto failed; } + if (cold) + page = list_entry(list->prev, struct page, lru); + else + page = list_entry(list->next, struct page, lru); + list_del(&page->lru); pcp->count--; } else { @@ -3024,6 +3034,7 @@ static int zone_batchsize(struct zone *zone) static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) { struct per_cpu_pages *pcp; + int migratetype; memset(p, 0, sizeof(*p)); @@ -3031,7 +3042,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) pcp->count = 0; pcp->high = 6 * batch; pcp->batch = max(1UL, 1 * batch); - INIT_LIST_HEAD(&pcp->list); + for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) + INIT_LIST_HEAD(&pcp->lists[migratetype]); } /* @@ -3223,7 +3235,7 @@ static int __zone_pcp_update(void *data) pcp = &pset->pcp; local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); + free_pcppages_bulk(zone, pcp->count, pcp); setup_pageset(pset, batch); local_irq_restore(flags); } -- cgit v1.2.3 From a6f9edd65beaef24836e8934c8912c1e974dd45c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:03:20 -0700 Subject: page-allocator: maintain rolling count of pages to free from the PCP When round-robin freeing pages from the PCP lists, empty lists may be encountered. In the event one of the lists has more pages than another, there may be numerous checks for list_empty() which is undesirable. This patch maintains a count of pages to free which is incremented when empty lists are encountered. The intention is that more pages will then be freed from fuller lists than the empty ones reducing the number of empty list checks in the free path. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Cc: Nick Piggin Cc: Christoph Lameter Reviewed-by: Minchan Kim Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1b1c39e6a9b8..6877e22e3aa1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -525,32 +525,38 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp) { int migratetype = 0; + int batch_free = 0; spin_lock(&zone->lock); zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; __mod_zone_page_state(zone, NR_FREE_PAGES, count); - while (count--) { + while (count) { struct page *page; struct list_head *list; /* - * Remove pages from lists in a round-robin fashion. This spinning - * around potentially empty lists is bloody awful, alternatives that - * don't suck are welcome + * Remove pages from lists in a round-robin fashion. A + * batch_free count is maintained that is incremented when an + * empty list is encountered. This is so more pages are freed + * off fuller lists instead of spinning excessively around empty + * lists */ do { + batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; list = &pcp->lists[migratetype]; } while (list_empty(list)); - page = list_entry(list->prev, struct page, lru); - /* have to delete it as __free_one_page list manipulates */ - list_del(&page->lru); - trace_mm_page_pcpu_drain(page, 0, migratetype); - __free_one_page(page, zone, 0, migratetype); + do { + page = list_entry(list->prev, struct page, lru); + /* must delete as __free_one_page list manipulates */ + list_del(&page->lru); + __free_one_page(page, zone, 0, migratetype); + trace_mm_page_pcpu_drain(page, 0, migratetype); + } while (--count && --batch_free && !list_empty(list)); } spin_unlock(&zone->lock); } -- cgit v1.2.3 From 03f6462a3ae78f36eb1f0ee8b4d5ae2f7859c1d5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:35 -0700 Subject: mm: move highest_memmap_pfn Move highest_memmap_pfn __read_mostly from page_alloc.c next to zero_pfn __read_mostly in memory.c: to help them share a cacheline, since they're very often tested together in vm_normal_page(). Signed-off-by: Hugh Dickins Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 3 ++- mm/memory.c | 1 + mm/page_alloc.c | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/internal.h b/mm/internal.h index 75596574911e..22ec8d2b0fb8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -37,6 +37,8 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } +extern unsigned long highest_memmap_pfn; + /* * in mm/vmscan.c: */ @@ -46,7 +48,6 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ -extern unsigned long highest_memmap_pfn; extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); diff --git a/mm/memory.c b/mm/memory.c index 9bdbd10cb418..b1443ac07c00 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -109,6 +109,7 @@ static int __init disable_randmaps(char *s) __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; +unsigned long highest_memmap_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6877e22e3aa1..5717f27a0704 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -72,7 +72,6 @@ EXPORT_SYMBOL(node_states); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; -unsigned long highest_memmap_pfn __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; -- cgit v1.2.3 From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:19 -0700 Subject: sysctl: remove "struct file *" argument of ->proc_handler It's unused. It isn't needed -- read or write flag is already passed and sysctl shouldn't care about the rest. It _was_ used in two places at arch/frv for some reason. Signed-off-by: Alexey Dobriyan Cc: David Howells Cc: "Eric W. Biederman" Cc: Al Viro Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Ingo Molnar Cc: "David S. Miller" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/pm.c | 14 +++---- arch/mips/lasat/sysctl.c | 18 ++++----- arch/s390/appldata/appldata_base.c | 9 ++--- arch/s390/kernel/debug.c | 4 +- arch/s390/mm/cmm.c | 4 +- arch/x86/include/asm/nmi.h | 3 +- arch/x86/kernel/apic/nmi.c | 4 +- arch/x86/kernel/vsyscall_64.c | 10 +---- drivers/cdrom/cdrom.c | 8 ++-- drivers/char/random.c | 4 +- drivers/net/wireless/arlan-proc.c | 28 +++++++------- drivers/parport/procfs.c | 12 +++--- fs/coda/coda_int.h | 1 + fs/drop_caches.c | 4 +- fs/file_table.c | 6 +-- fs/proc/proc_sysctl.c | 2 +- fs/xfs/linux-2.6/xfs_sysctl.c | 3 +- include/linux/fs.h | 2 +- include/linux/ftrace.h | 4 +- include/linux/hugetlb.h | 6 +-- include/linux/mm.h | 2 +- include/linux/mmzone.h | 13 +++---- include/linux/sched.h | 8 ++-- include/linux/security.h | 2 +- include/linux/swap.h | 2 +- include/linux/sysctl.h | 19 +++++----- include/linux/writeback.h | 11 +++--- include/net/ip.h | 2 +- include/net/ndisc.h | 2 - ipc/ipc_sysctl.c | 16 ++++---- ipc/mq_sysctl.c | 8 ++-- kernel/hung_task.c | 4 +- kernel/sched.c | 4 +- kernel/sched_fair.c | 4 +- kernel/slow-work.c | 12 +++--- kernel/softlockup.c | 4 +- kernel/sysctl.c | 78 ++++++++++++++++---------------------- kernel/trace/ftrace.c | 4 +- kernel/trace/trace_stack.c | 4 +- kernel/utsname_sysctl.c | 4 +- mm/hugetlb.c | 12 +++--- mm/page-writeback.c | 20 +++++----- mm/page_alloc.c | 24 ++++++------ mm/vmscan.c | 4 +- net/bridge/br_netfilter.c | 4 +- net/decnet/dn_dev.c | 5 +-- net/decnet/sysctl_net_decnet.c | 2 - net/ipv4/devinet.c | 12 +++--- net/ipv4/route.c | 7 ++-- net/ipv4/sysctl_net_ipv4.c | 16 ++++---- net/ipv6/addrconf.c | 8 ++-- net/ipv6/ndisc.c | 8 ++-- net/ipv6/route.c | 4 +- net/irda/irsysctl.c | 8 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++-- net/netfilter/nf_log.c | 4 +- net/phonet/sysctl.c | 4 +- net/sunrpc/sysctl.c | 4 +- net/sunrpc/xprtrdma/svc_rdma.c | 2 +- security/min_addr.c | 4 +- 60 files changed, 239 insertions(+), 270 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index be722fc1acff..0d4d3e3a4cfc 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len) /* * Send us to sleep. */ -static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp, +static int sysctl_pm_do_suspend(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int retval, mode; @@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode) } -static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, +static int cmode_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cmode; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cmode = user_atoi(buffer, *lenp); @@ -301,13 +301,13 @@ static int try_set_cm(int new_cm) return 0; } -static int p0_procctl(ctl_table *ctl, int write, struct file *filp, +static int p0_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_p0; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_p0 = user_atoi(buffer, *lenp); @@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table, return 1; } -static int cm_procctl(ctl_table *ctl, int write, struct file *filp, +static int cm_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cm; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cm = user_atoi(buffer, *lenp); diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3f04d4c406b7..b3deed8db619 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table, /* And the same for proc */ -int proc_dolasatstring(ctl_table *table, int write, struct file *filp, +int proc_dolasatstring(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp, } /* proc function to write EEPROM after changing int entry */ -int proc_dolasatint(ctl_table *table, int write, struct file *filp, +int proc_dolasatint(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp, static int rtctmp; /* proc function to read/write RealTime Clock */ -int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, +int proc_dolasatrtc(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct timespec ts; @@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, if (rtctmp < 0) rtctmp = 0; } - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r) return r; @@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table, #endif #ifdef CONFIG_INET -int proc_lasat_ip(ctl_table *table, int write, struct file *filp, +int proc_lasat_ip(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int ip; @@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table, return 0; } -int proc_lasat_prid(ctl_table *table, int write, struct file *filp, +int proc_lasat_prid(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r < 0) return r; if (write) { diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 264528e4f58d..b55fd7ed1c31 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev; * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +static int appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len; @@ -289,7 +288,7 @@ out: * current timer interval. */ static int -appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, +appldata_interval_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len, interval; @@ -335,7 +334,7 @@ out: * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, +appldata_generic_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4c512561687d..20f282c911c2 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -881,11 +881,11 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, struct file *filp, +s390dbf_procactive(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); else return 0; } diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 413c240cbca7..b201135cc18c 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp) static struct ctl_table cmm_table[]; static int -cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, +cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; @@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, } static int -cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, +cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e1..139d4c1a33a7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; #define NMI_INVALID 3 struct ctl_table; -struct file; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, +extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index cb66a22d98ad..7ff61d6a188a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) /* * proc handler for /proc/sys/kernel/nmi */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, +int proc_nmi_enabled(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int old_state; nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (!!old_state == !!nmi_watchdog_enabled) return 0; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cf53a78e2dcf..8cb4974ff599 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) } #ifdef CONFIG_SYSCTL - -static int -vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); -} - static ctl_table kernel_table2[] = { { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = vsyscall_sysctl_change }, + .proc_handler = proc_dointvec }, {} }; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 71d1b9bab70b..614da5b8613a 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info, return 0; } -static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_info(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int pos; @@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, goto done; doit: mutex_unlock(&cdrom_mutex); - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); done: printk(KERN_INFO "cdrom: info buffer too small\n"); goto doit; @@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void) mutex_unlock(&cdrom_mutex); } -static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { diff --git a/drivers/char/random.c b/drivers/char/random.c index d8a9255e1a3f..04b505e5a5e2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1231,7 +1231,7 @@ static char sysctl_bootid[16]; * as an ASCII string in the standard UUID format. If accesses via the * sysctl system call, it is returned as 16 bytes of binary data. */ -static int proc_do_uuid(ctl_table *table, int write, struct file *filp, +static int proc_do_uuid(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { ctl_table fake_table; @@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, fake_table.data = buf; fake_table.maxlen = sizeof(buf); - return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); + return proc_dostring(&fake_table, write, buffer, lenp, ppos); } static int uuid_strategy(ctl_table *table, diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c index 2ab1d59870f4..a8b689635a3b 100644 --- a/drivers/net/wireless/arlan-proc.c +++ b/drivers/net/wireless/arlan-proc.c @@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev) static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; -static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -629,7 +629,7 @@ final: *lenp = pos; if (!write) - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); else { *lenp = 0; @@ -639,7 +639,7 @@ final: } -static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info161719(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, txBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, rxBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info18(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } @@ -766,7 +766,7 @@ final: static char conf_reset_result[200]; -static int arlan_configure(ctl_table * ctl, int write, struct file *filp, +static int arlan_configure(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp, return -1; *lenp = pos; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } -static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_reset(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, } else return -1; *lenp = pos + 3; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 554e11f9e1ce..8eefe56f1cbe 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c @@ -31,7 +31,7 @@ #define PARPORT_MIN_SPINTIME_VALUE 1 #define PARPORT_MAX_SPINTIME_VALUE 1000 -static int do_active_device(ctl_table *table, int write, struct file *filp, +static int do_active_device(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp, } #ifdef CONFIG_PARPORT_1284 -static int do_autoprobe(ctl_table *table, int write, struct file *filp, +static int do_autoprobe(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport_device_info *info = table->extra2; @@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp, #endif /* IEEE1284.3 support. */ static int do_hardware_base_addr (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write, } static int do_hardware_irq (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write, } static int do_hardware_dma (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write, } static int do_hardware_modes (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9c..d99860a33890 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -2,6 +2,7 @@ #define _CODA_INT_ struct dentry; +struct file; extern struct file_system_type coda_fs_type; extern unsigned long coda_timeout; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb7913447..31f4b0e6d72c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -63,9 +63,9 @@ static void drop_slab(void) } int drop_caches_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); if (write) { if (sysctl_drop_caches & 1) drop_pagecache(); diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f8..8eb44042e009 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); } #else -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16bf..f667e8aeabdf 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, write, filp, buf, &res, ppos); + error = table->proc_handler(table, write, buf, &res, ppos); if (!error) error = res; out: diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb6083..c5bc67c4e3bb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -26,7 +26,6 @@ STATIC int xfs_stats_clear_proc_handler( ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( int c, ret, *valp = ctl->data; __uint32_t vn_active; - ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { printk("XFS Clearing xfsstats\n"); diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b095..33ed6644abd0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2467,7 +2467,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18daf..cd3d2abaf30a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,7 +19,7 @@ extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); @@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } extern int stack_tracer_enabled; int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73eff..11ab19ac6b3d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144b..87218ae84e36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1279,7 +1279,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be582..6f7561730d88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int numa_zonelist_order_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e951bd2eb9fc..811cd96524d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -309,7 +309,7 @@ extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; @@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -1906,7 +1906,7 @@ extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos); #endif #ifdef CONFIG_SCHED_DEBUG @@ -1924,7 +1924,7 @@ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int sysctl_sched_compat_yield; diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9ef..239e40d0450b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return PAGE_ALIGN(mmap_min_addr); return hint; } -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_SECURITY diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c78fea989b9..82232dbea3f7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -245,7 +245,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, +extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a466..1e4743ee6831 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -29,7 +29,6 @@ #include #include -struct file; struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a @@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dostring(struct ctl_table *, int, struct file *, +extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, +extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff9..66ebddcff664 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, diff --git a/include/net/ip.h b/include/net/ip.h index 72c36926c26d..5b26a0bd178e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, * fed into the routing cache should use these handlers. */ int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1459ed3e2697..f76f22d05721 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -55,7 +55,6 @@ enum { #include struct ctl_table; -struct file; struct inet6_dev; struct net_device; struct net_proto_family; @@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 40eab7314aeb..7d3704750efc 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table) } #ifdef CONFIG_PROC_SYSCTL -static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } static int proc_ipc_callback_dointvec(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, } static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_doulongvec_minmax(&ipc_table, write, filp, buffer, + return proc_doulongvec_minmax(&ipc_table, write, buffer, lenp, ppos); } @@ -95,7 +95,7 @@ static void ipc_auto_callback(int val) } static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, ipc_table.data = get_ipc(table); oldval = *((int *)(ipc_table.data)); - rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) { int newval = *((int *)(ipc_table.data)); diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 24ae46dfe45d..8a058711fc10 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table) return which; } -static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_mq_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&mq_table, write, buffer, lenp, ppos); } static int proc_mq_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec_minmax(&mq_table, write, filp, buffer, + return proc_dointvec_minmax(&mq_table, write, buffer, lenp, ppos); } #else diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 022a4927b785..d4e841747400 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout) * Process updating of timeout sysctl */ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) goto out; diff --git a/kernel/sched.c b/kernel/sched.c index 0d0361b9dbb3..ee61f454a98b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void) #endif /* CONFIG_RT_GROUP_SCHED */ int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_constraints(); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ecc637a0d591..4e777b47eeda 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) #ifdef CONFIG_SCHED_DEBUG int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 09d7519557d3..0d31135efbf4 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); #ifdef CONFIG_SYSCTL -static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, +static int slow_work_min_threads_sysctl(struct ctl_table *, int, void __user *, size_t *, loff_t *); -static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, +static int slow_work_max_threads_sysctl(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif @@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data) * Handle adjustment of the minimum number of threads */ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { @@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, * Handle adjustment of the maximum number of threads */ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 88796c330838..81324d12eb35 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void) EXPORT_SYMBOL(touch_all_softlockup_watchdogs); int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { touch_all_softlockup_watchdogs(); - return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 37abb8c3995b..a02697b7cb97 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -163,9 +163,9 @@ extern int max_lock_depth; #endif #ifdef CONFIG_PROC_SYSCTL -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -2226,7 +2226,7 @@ void sysctl_head_put(struct ctl_table_header *head) #ifdef CONFIG_PROC_SYSCTL static int _proc_do_string(void* data, int maxlen, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { size_t len; @@ -2287,7 +2287,6 @@ static int _proc_do_string(void* data, int maxlen, int write, * proc_dostring - read a string sysctl * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2301,10 +2300,10 @@ static int _proc_do_string(void* data, int maxlen, int write, * * Returns 0 on success. */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return _proc_do_string(table->data, table->maxlen, write, filp, + return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, ppos); } @@ -2329,7 +2328,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, - int write, struct file *filp, void __user *buffer, + int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -2436,13 +2435,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, #undef TMPBUFLEN } -static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, +static int do_proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { - return __do_proc_dointvec(table->data, table, write, filp, + return __do_proc_dointvec(table->data, table, write, buffer, lenp, ppos, conv, data); } @@ -2450,7 +2449,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * proc_dointvec - read a vector of integers * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2460,10 +2458,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * * Returns 0 on success. */ -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, NULL,NULL); } @@ -2471,7 +2469,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, * Taint values can only be increased * This means we can safely use a temporary. */ -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -2483,7 +2481,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp, t = *table; t.data = &tmptaint; - err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); if (err < 0) return err; @@ -2535,7 +2533,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * proc_dointvec_minmax - read a vector of integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2548,19 +2545,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { .min = (int *) table->extra1, .max = (int *) table->extra2, }; - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_minmax_conv, ¶m); } static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, @@ -2665,21 +2661,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { return __do_proc_doulongvec_minmax(table->data, table, write, - filp, buffer, lenp, ppos, convmul, convdiv); + buffer, lenp, ppos, convmul, convdiv); } /** * proc_doulongvec_minmax - read a vector of long integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2692,17 +2686,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); } /** * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2717,11 +2710,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp * Returns 0 on success. */ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, HZ, 1000l); } @@ -2797,7 +2789,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * proc_dointvec_jiffies - read a vector of integers as seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2809,10 +2800,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); } @@ -2820,7 +2811,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: pointer to the file position @@ -2832,10 +2822,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_userhz_jiffies_conv,NULL); } @@ -2843,7 +2833,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2856,14 +2845,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; @@ -2872,7 +2861,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp tmp = pid_vnr(cad_pid); - r = __do_proc_dointvec(&tmp, table, write, filp, buffer, + r = __do_proc_dointvec(&tmp, table, write, buffer, lenp, ppos, NULL, NULL); if (r || !write) return r; @@ -2887,50 +2876,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp #else /* CONFIG_PROC_FS */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 23df7771c937..a142579765bf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_lock(&ftrace_lock); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0f6facb050a1..8504ac71e4e8 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = { int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&stack_sysctl_mutex); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_stack_tracer_enabled == !!stack_tracer_enabled)) diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 92359cc747a7..69eae358a726 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which) * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? */ -static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, +static int proc_do_uts_string(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table uts_table; int r; memcpy(&uts_table, table, sizeof(uts_table)); uts_table.data = get_uts(table, write); - r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos); + r = proc_dostring(&uts_table,write,buffer,lenp, ppos); put_uts(table, write, uts_table.data); return r; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 815dbd4a6dcb..6f048fcc749c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) #ifdef CONFIG_SYSCTL int hugetlb_sysctl_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) h->max_huge_pages = set_max_huge_pages(h, tmp); @@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, } int hugetlb_treat_movable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (hugepages_treat_as_movable) htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; else @@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write, } int hugetlb_overcommit_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) { spin_lock(&hugetlb_lock); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5f378dd58802..be197f71b096 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -155,37 +155,37 @@ static void update_completion_period(void) } int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_ratio = 0; return ret; } int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { update_completion_period(); vm_dirty_bytes = 0; @@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write, int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { update_completion_period(); vm_dirty_ratio = 0; @@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5717f27a0704..88248b3c20bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2373,7 +2373,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); * sysctl handler for numa_zonelist_order */ int numa_zonelist_order_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos) { char saved_string[NUMA_ZONELIST_ORDER_LEN]; @@ -2382,7 +2382,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, if (write) strncpy(saved_string, (char*)table->data, NUMA_ZONELIST_ORDER_LEN); - ret = proc_dostring(table, write, file, buffer, length, ppos); + ret = proc_dostring(table, write, buffer, length, ppos); if (ret) return ret; if (write) { @@ -4706,9 +4706,9 @@ module_init(init_per_zone_wmark_min) * changes. */ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (write) setup_per_zone_wmarks(); return 0; @@ -4716,12 +4716,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, #ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4732,12 +4732,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, } int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4758,9 +4758,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); setup_per_zone_lowmem_reserve(); return 0; } @@ -4772,13 +4772,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, */ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; unsigned int cpu; int ret; - ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 2423782214ab..f444b7409085 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2844,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void) unsigned long scan_unevictable_pages; int scan_unevictable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write && *(unsigned long *)table->data) scan_all_zones_unevictable_pages(); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 907a82e9023d..a16a2342f6bf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, +int brnf_sysctl_call_tables(ctl_table * ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 1c6a5bb6f0c8..6e1f085db06a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, struct file *, +static int dn_forwarding_proc(ctl_table *, int, void __user *, size_t *, loff_t *); static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, @@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } static int dn_forwarding_proc(ctl_table *table, int write, - struct file *filep, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, dn_db = dev->dn_ptr; old = dn_db->parms.forwarding; - err = proc_dointvec(table, write, filep, buffer, lenp, ppos); + err = proc_dointvec(table, write, buffer, lenp, ppos); if ((err >= 0) && write) { if (dn_db->parms.forwarding < 0) diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 5bcd592ae6dd..26b0ab1e9f56 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table, } static int dn_node_address_handler(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table, static int dn_def_dev_handler(ctl_table *table, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 07336c6201f0..e92f1fd28aa5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net) } static int devinet_conf_proc(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table, } static int devinet_sysctl_forward(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; @@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); struct net *net = ctl->extra2; if (write && *valp != val) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df9347314538..bb4199252026 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) #ifdef CONFIG_SYSCTL static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { @@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, memcpy(&ctl, __ctl, sizeof(ctl)); ctl.data = &flush_delay; - proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(&ctl, write, buffer, lenp, ppos); net = (struct net *)__ctl->extra1; rt_cache_flush(net, flush_delay); @@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old) } static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old = ip_rt_secret_interval; - int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); rt_secret_reschedule(old); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4710d219f06a..2dcf04d9b005 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,7 +36,7 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, +static int ipv4_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, }; inet_get_local_port_range(range, range + 1); - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) @@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, +static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; @@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * tcp_get_default_congestion_control(val); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_default_congestion_control(val); return ret; @@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, } static int proc_tcp_available_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, if (!tbl.data) return -ENOMEM; tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static int proc_allowed_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return -ENOMEM; tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 55f486d89c88..1fd0a3d775d2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_forward(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); @@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_disable(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7015478797f6..498b9b0b0fad 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, } } -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) +int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; @@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = proc_dointvec_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = proc_dointvec_ms_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else ret = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 77aecbe8ff6c..d6fe7646a8ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; int delay = net->ipv6.sysctl.flush_delay; if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(ctl, write, buffer, lenp, ppos); fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 57f8817c3979..5c86567e5a78 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, struct file *filp, +static int do_devname(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dostring(table, write, filp, buffer, lenp, ppos); + ret = proc_dostring(table, write, buffer, lenp, ppos); if (ret == 0 && write) { struct ias_value *val; @@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp, } -static int do_discovery(ctl_table *table, int write, struct file *filp, +static int do_discovery(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fba2892b99e1..446e9bd4b4bc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void) static int -proc_do_defense_mode(ctl_table *table, int write, struct file * filp, +proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int val = *valp; int rc; - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (*valp != val)) { if ((*valp < 0) || (*valp > 3)) { /* Restore the correct value */ @@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp, static int -proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, +proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, /* backup the value first */ memcpy(val, valp, sizeof(val)); - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { /* Restore the correct value */ memcpy(valp, val, sizeof(val)); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4e620305f28c..c93494fef8ef 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; -static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, +static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; @@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, table->data = "NONE"; else table->data = logger->name; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); mutex_unlock(&nf_log_mutex); } diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 7b5749ee2765..2220f3322326 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, struct file *filp, +static int proc_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &local_port_range_max, }; - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5231f7aaac0e..42f9748ae093 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -56,7 +56,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, struct file *file, +static int proc_do_xprt(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, } static int -proc_dodebug(ctl_table *table, int write, struct file *file, +proc_dodebug(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 87101177825b..35fb68b9c8ec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep; * current value. */ static int read_reset_stat(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { atomic_t *stat = (atomic_t *)table->data; diff --git a/security/min_addr.c b/security/min_addr.c index 14cc7b3b8d03..c844eed7915d 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -28,12 +28,12 @@ static void update_mmap_min_addr(void) * sysctl handler which just sets dac_mmap_min_addr = the new value and then * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly */ -int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); update_mmap_min_addr(); -- cgit v1.2.3