From 80119ef5c8153e0a6cc5edf00c083dc98a9bd348 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 23 May 2008 13:04:31 -0700 Subject: mm: fix atomic_t overflow in vm The atomic_t type is 32bit but a 64bit system can have more than 2^32 pages of virtual address space available. Without this we overflow on ludicrously large mappings Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc/proc_misc.c') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..32dc14cd8900 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_read(&vm_committed_space); + committed = atomic_long_read(&vm_committed_space); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; -- cgit v1.2.3 From a2eddfa95919a730e0e5ed17e9c303fe5ba249cd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: make /proc/stat account for all interrupts LAPIC interrupts, which don't go through the generic interrupt handling code, aren't accounted for in /proc/stat. Hence this patch adds a mechanism architectures can use to accordingly adjust the statistics. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq_32.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_64.c | 28 ++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 9 +++++++++ include/asm-x86/hardirq.h | 6 ++++++ 4 files changed, 81 insertions(+) (limited to 'fs/proc/proc_misc.c') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352df28b9..468acd04aa2e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -313,16 +313,20 @@ skip: per_cpu(irq_stat,j).irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); +#endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); @@ -331,6 +335,40 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = nmi_count(cpu); + +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += per_cpu(irq_stat, cpu).irq_resched_count; + sum += per_cpu(irq_stat, cpu).irq_call_count; + sum += per_cpu(irq_stat, cpu).irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += per_cpu(irq_stat, cpu).irq_thermal_count; +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac15466a91..1f78b238d8d2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -135,6 +135,7 @@ skip: seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); @@ -143,6 +144,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); +#endif seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); @@ -152,6 +154,32 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = cpu_pda(cpu)->__nmi_count; + + sum += cpu_pda(cpu)->apic_timer_irqs; +#ifdef CONFIG_SMP + sum += cpu_pda(cpu)->irq_resched_count; + sum += cpu_pda(cpu)->irq_call_count; + sum += cpu_pda(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += cpu_pda(cpu)->irq_thermal_count; + sum += cpu_pda(cpu)->irq_threshold_count; +#endif + sum += cpu_pda(cpu)->irq_spurious_count; + return sum; +} + +u64 arch_irq_stat(void) +{ + return atomic_read(&irq_err_count); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..903e617bec58 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -472,6 +472,13 @@ static const struct file_operations proc_vmalloc_operations = { }; #endif +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + static int show_stat(struct seq_file *p, void *v) { int i; @@ -509,7 +516,9 @@ static int show_stat(struct seq_file *p, void *v) sum += temp; per_irq_sum[j] += temp; } + sum += arch_irq_stat_cpu(i); } + sum += arch_irq_stat(); seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h index 314434d664e7..000787df66e6 100644 --- a/include/asm-x86/hardirq.h +++ b/include/asm-x86/hardirq.h @@ -3,3 +3,9 @@ #else # include "hardirq_64.h" #endif + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat -- cgit v1.2.3 From bbcdac0c20aa20d1daad41d9c138102b70e5aae4 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return map count, not reference count, in /proc/kpagecount Since pagemap is all about examining pages mapped into processes' memory spaces, it makes sense for kpagecount to return the map counts, not the reference counts. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc/proc_misc.c') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 32dc14cd8900..5a16090a6d6e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -726,7 +726,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, if (!ppage) pcount = 0; else - pcount = atomic_read(&ppage->_count); + pcount = page_mapcount(ppage); if (put_user(pcount, out++)) { ret = -EFAULT; -- cgit v1.2.3 From 4710d1ac4c491dd8a28f57946214c0b5fe73cc87 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return EINVAL, not EIO, for unaligned reads of kpagecount or kpageflags If the user tries to read from a position that is not a multiple of 8, or read a number of bytes that is not a multiple of 8, they have passed an invalid argument to read, for the purpose of reading these files. It's not an IO error because we didn't encounter any trouble finding the data they asked for. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc/proc_misc.c') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5a16090a6d6e..7e277f2ad466 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -716,7 +716,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; @@ -782,7 +782,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; -- cgit v1.2.3 From ce0c0e50f94e8c55b00a722e8c6e8d6c802be211 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 2 May 2008 11:46:49 +0200 Subject: x86, generic: CPA add statistics about state of direct mapping v4 Add information about the mapping state of the direct mapping to /proc/meminfo. I chose /proc/meminfo because that is where all the other memory statistics are too and it is a generally useful metric even outside debugging situations. A lot of split kernel pages means the kernel will run slower. This way we can see how many large pages are really used for it and how many are split. Useful for general insight into the kernel. v2: Add hotplug locking to 64bit to plug a very obscure theoretical race. 32bit doesn't need it because it doesn't support hotadd for lowmem. Fix some typos v3: Rename dpages_cnt Add CONFIG ifdef for count update as requested by tglx Expand description v4: Fix stupid bugs added in v3 Move update_page_count to pageattr.c Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 5 +++++ arch/x86/mm/init_64.c | 7 +++++++ arch/x86/mm/pageattr.c | 35 +++++++++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 7 +++++++ include/asm-x86/pgtable.h | 3 +++ 5 files changed, 57 insertions(+) (limited to 'fs/proc/proc_misc.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b6..0269ac230bfa 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pgd_t *pgd; pmd_t *pmd; pte_t *pte; + unsigned pages_2m = 0, pages_4k = 0; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; @@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; + pages_2m++; set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; @@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) if (is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; + pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } max_pfn_mapped = pfn; } } + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } static inline int page_kills_ppro(unsigned long pagenr) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 819dad973b13..5e4383859053 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -312,6 +312,8 @@ __meminit void early_iounmap(void *addr, unsigned long size) static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { + unsigned long pages = 0; + int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { @@ -328,9 +330,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) if (pmd_val(*pmd)) continue; + pages++; set_pte((pte_t *)pmd, pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); } + update_page_count(PG_LEVEL_2M, pages); return address; } @@ -350,6 +354,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { + unsigned long pages = 0; unsigned long last_map_addr = end; int i = pud_index(addr); @@ -374,6 +379,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) } if (direct_gbpages) { + pages++; set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; @@ -390,6 +396,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) unmap_low_page(pmd); } __flush_tlb_all(); + update_page_count(PG_LEVEL_1G, pages); return last_map_addr >> PAGE_SHIFT; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37e..668205bca15e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -34,6 +34,19 @@ struct cpa_data { unsigned force_split : 1; }; +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void __meminit update_page_count(int level, unsigned long pages) +{ +#ifdef CONFIG_PROC_FS + unsigned long flags; + /* Protect against CPA */ + spin_lock_irqsave(&pgd_lock, flags); + direct_pages_count[level] += pages; + spin_unlock_irqrestore(&pgd_lock, flags); +#endif +} + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -500,6 +513,12 @@ static int split_large_page(pte_t *kpte, unsigned long address) for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + if (address >= (unsigned long)__va(0) && + address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) { + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; + } + /* * Install the new, split up pagetable. Important details here: * @@ -1029,6 +1048,22 @@ bool kernel_page_present(struct page *page) #endif /* CONFIG_DEBUG_PAGEALLOC */ +#ifdef CONFIG_PROC_FS +int arch_report_meminfo(char *page) +{ + int n; + n = sprintf(page, "DirectMap4k: %8lu\n" + "DirectMap2M: %8lu\n", + direct_pages_count[PG_LEVEL_4K], + direct_pages_count[PG_LEVEL_2M]); +#ifdef CONFIG_X86_64 + n += sprintf(page + n, "DirectMap1G: %8lu\n", + direct_pages_count[PG_LEVEL_1G]); +#endif + return n; +} +#endif + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 7e277f2ad466..15f7bd41029b 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +int __attribute__((weak)) arch_report_meminfo(char *page) +{ + return 0; +} + static int meminfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, len += hugetlb_report_meminfo(page + len); + len += arch_report_meminfo(page + len); + return proc_calc_metrics(page, start, off, count, eof, len); #undef K } diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b2910b..111564fa5e70 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -369,8 +369,11 @@ enum { PG_LEVEL_4K, PG_LEVEL_2M, PG_LEVEL_1G, + PG_LEVEL_NUM }; +void update_page_count(int level, unsigned long pages); + /* * Helper function that returns the kernel pagetable entry controlling * the virtual address 'address'. NULL means no pagetable entry present. -- cgit v1.2.3