From ba6a328f7dfc95b20df5e0eb33c698187e997190 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 26 Jan 2014 13:01:42 -0800 Subject: x86/mm: Avoid duplicated pxm_to_node() calls In slit init code, too many pxm_to_node() function calls are done. We can store from_node/to_node instead of keep calling pxm_to_node(). - Before this patch: pxm_to_node() is called n*(1+n*3) times. - After this patch: pxm_to_node() is called n*(1+n) times. for 8 sockets, it will be 72 instead of 200. for 32 sockets, it will be 1056 instead of 3104. Signed-off-by: Yinghai Lu Cc: Toshi Kani Cc: David Rientjes Link: http://lkml.kernel.org/r/1390770102-4007-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 1953e9c9391a..66338a60aa6e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -52,12 +52,18 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) int i, j; for (i = 0; i < slit->locality_count; i++) { - if (pxm_to_node(i) == NUMA_NO_NODE) + const int from_node = pxm_to_node(i); + + if (from_node == NUMA_NO_NODE) continue; + for (j = 0; j < slit->locality_count; j++) { - if (pxm_to_node(j) == NUMA_NO_NODE) + const int to_node = pxm_to_node(j); + + if (to_node == NUMA_NO_NODE) continue; - numa_set_distance(pxm_to_node(i), pxm_to_node(j), + + numa_set_distance(from_node, to_node, slit->entry[slit->locality_count * i + j]); } } -- cgit v1.2.3 From b5660ba76b41af69a0c09d434927bb4b4cadd4b1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 25 Feb 2014 12:14:06 -0800 Subject: x86, platforms: Remove NUMAQ The NUMAQ support seems to be unmaintained, remove it. Cc: Paul Gortmaker Cc: David Rientjes Acked-by: Paul E. McKenney Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/n/530CFD6C.7040705@zytor.com --- arch/x86/Kconfig | 36 +-- arch/x86/Kconfig.cpu | 2 +- arch/x86/include/asm/mmzone_32.h | 3 - arch/x86/include/asm/mpspec.h | 6 - arch/x86/include/asm/numaq.h | 171 ------------- arch/x86/kernel/apic/Makefile | 1 - arch/x86/kernel/apic/numaq_32.c | 524 --------------------------------------- arch/x86/kernel/cpu/intel.c | 4 - arch/x86/mm/numa.c | 4 - arch/x86/pci/Makefile | 1 - arch/x86/pci/numaq_32.c | 165 ------------ 11 files changed, 9 insertions(+), 908 deletions(-) delete mode 100644 arch/x86/include/asm/numaq.h delete mode 100644 arch/x86/kernel/apic/numaq_32.c delete mode 100644 arch/x86/pci/numaq_32.c (limited to 'arch/x86/mm') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3c7f6dbf8498..e1d0c9ac2b56 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -346,7 +346,6 @@ config X86_EXTENDED_PLATFORM for the following (non-PC) 32 bit x86 platforms: Goldfish (Android emulator) AMD Elan - NUMAQ (IBM/Sequent) RDC R-321x SoC SGI 320/540 (Visual Workstation) STA2X11-based (e.g. Northville) @@ -487,32 +486,18 @@ config X86_32_NON_STANDARD depends on X86_32 && SMP depends on X86_EXTENDED_PLATFORM ---help--- - This option compiles in the NUMAQ, bigsmp, and STA2X11 default - subarchitectures. It is intended for a generic binary kernel. If you - select them all, kernel will probe it one by one and will fallback to - default. + This option compiles in the bigsmp and STA2X11 default + subarchitectures. It is intended for a generic binary + kernel. If you select them all, kernel will probe it one by + one and will fallback to default. # Alphabetically sorted list of Non standard 32 bit platforms -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on X86_32_NON_STANDARD - depends on PCI - select NUMA - select X86_MPPARSE - ---help--- - This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) - NUMA multiquad box. This changes the way that processors are - bootstrapped, and uses Clustered Logical APIC addressing mode instead - of Flat Logical. You will need a new lynxer.elf file to flash your - firmware with - send email to . - config X86_SUPPORTS_MEMORY_FAILURE def_bool y # MCE code calls memory_failure(): depends on X86_MCE # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: - depends on !X86_NUMAQ # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: depends on X86_64 || !SPARSEMEM select ARCH_SUPPORTS_MEMORY_FAILURE @@ -783,7 +768,7 @@ config NR_CPUS range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP - default "32" if SMP && (X86_NUMAQ || X86_BIGSMP) + default "32" if SMP && X86_BIGSMP default "8" if SMP ---help--- This allows you to specify the maximum number of CPUs which this @@ -1064,13 +1049,11 @@ config X86_CPUID choice prompt "High Memory Support" - default HIGHMEM64G if X86_NUMAQ default HIGHMEM4G depends on X86_32 config NOHIGHMEM bool "off" - depends on !X86_NUMAQ ---help--- Linux can use up to 64 Gigabytes of physical memory on x86 systems. However, the address space of 32-bit x86 processors is only 4 @@ -1107,7 +1090,6 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" - depends on !X86_NUMAQ ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -1199,8 +1181,8 @@ config DIRECT_GBPAGES config NUMA bool "Numa Memory Allocation and Scheduler Support" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP)) - default y if (X86_NUMAQ || X86_BIGSMP) + depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) + default y if X86_BIGSMP ---help--- Enable NUMA (Non Uniform Memory Access) support. @@ -1211,8 +1193,7 @@ config NUMA For 64-bit this is recommended if the system is Intel Core i7 (or later), AMD Opteron, or EM64T NUMA. - For 32-bit this is only needed on (rare) 32-bit-only platforms - that support NUMA topologies, such as NUMAQ, or if you boot a 32-bit + For 32-bit this is only needed if you boot a 32-bit kernel on a 64-bit NUMA platform. Otherwise, you should say N. @@ -1258,7 +1239,6 @@ config NODES_SHIFT range 1 10 default "10" if MAXSMP default "6" if X86_64 - default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES ---help--- diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c026cca5602c..3c6b17b08a18 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -363,7 +363,7 @@ config X86_P6_NOP config X86_TSC def_bool y - depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 config X86_CMPXCHG64 def_bool y diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 8a9b3e288cb4..1ec990bd7dc0 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -11,9 +11,6 @@ #ifdef CONFIG_NUMA extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) - -#include - #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 3e6b4920ef5d..f5a617956735 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -25,12 +25,6 @@ extern int pic_mode; extern unsigned int def_to_bigsmp; -#ifdef CONFIG_X86_NUMAQ -extern int mp_bus_id_to_node[MAX_MP_BUSSES]; -extern int mp_bus_id_to_local[MAX_MP_BUSSES]; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -#endif - #else /* CONFIG_X86_64: */ #define MAX_MP_BUSSES 256 diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h deleted file mode 100644 index c3b3c322fd87..000000000000 --- a/arch/x86/include/asm/numaq.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to - */ - -#ifndef _ASM_X86_NUMAQ_H -#define _ASM_X86_NUMAQ_H - -#ifdef CONFIG_X86_NUMAQ - -extern int found_numaq; -extern int numaq_numa_init(void); -extern int pci_numaq_init(void); - -extern void *xquad_portio; - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) - -/* - * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the - */ -#define SYS_CFG_DATA_PRIV_ADDR 0x0009d000 /* place for scd in private - quad space */ - -/* - * Communication area for each processor on lynxer-processor tests. - * - * NOTE: If you change the size of this eachproc structure you need - * to change the definition for EACH_QUAD_SIZE. - */ -struct eachquadmem { - unsigned int priv_mem_start; /* Starting address of this */ - /* quad's private memory. */ - /* This is always 0. */ - /* In MB. */ - unsigned int priv_mem_size; /* Size of this quad's */ - /* private memory. */ - /* In MB. */ - unsigned int low_shrd_mem_strp_start;/* Starting address of this */ - /* quad's low shared block */ - /* (untranslated). */ - /* In MB. */ - unsigned int low_shrd_mem_start; /* Starting address of this */ - /* quad's low shared memory */ - /* (untranslated). */ - /* In MB. */ - unsigned int low_shrd_mem_size; /* Size of this quad's low */ - /* shared memory. */ - /* In MB. */ - unsigned int lmmio_copb_start; /* Starting address of this */ - /* quad's local memory */ - /* mapped I/O in the */ - /* compatibility OPB. */ - /* In MB. */ - unsigned int lmmio_copb_size; /* Size of this quad's local */ - /* memory mapped I/O in the */ - /* compatibility OPB. */ - /* In MB. */ - unsigned int lmmio_nopb_start; /* Starting address of this */ - /* quad's local memory */ - /* mapped I/O in the */ - /* non-compatibility OPB. */ - /* In MB. */ - unsigned int lmmio_nopb_size; /* Size of this quad's local */ - /* memory mapped I/O in the */ - /* non-compatibility OPB. */ - /* In MB. */ - unsigned int io_apic_0_start; /* Starting address of I/O */ - /* APIC 0. */ - unsigned int io_apic_0_sz; /* Size I/O APIC 0. */ - unsigned int io_apic_1_start; /* Starting address of I/O */ - /* APIC 1. */ - unsigned int io_apic_1_sz; /* Size I/O APIC 1. */ - unsigned int hi_shrd_mem_start; /* Starting address of this */ - /* quad's high shared memory.*/ - /* In MB. */ - unsigned int hi_shrd_mem_size; /* Size of this quad's high */ - /* shared memory. */ - /* In MB. */ - unsigned int mps_table_addr; /* Address of this quad's */ - /* MPS tables from BIOS, */ - /* in system space.*/ - unsigned int lcl_MDC_pio_addr; /* Port-I/O address for */ - /* local access of MDC. */ - unsigned int rmt_MDC_mmpio_addr; /* MM-Port-I/O address for */ - /* remote access of MDC. */ - unsigned int mm_port_io_start; /* Starting address of this */ - /* quad's memory mapped Port */ - /* I/O space. */ - unsigned int mm_port_io_size; /* Size of this quad's memory*/ - /* mapped Port I/O space. */ - unsigned int mm_rmt_io_apic_start; /* Starting address of this */ - /* quad's memory mapped */ - /* remote I/O APIC space. */ - unsigned int mm_rmt_io_apic_size; /* Size of this quad's memory*/ - /* mapped remote I/O APIC */ - /* space. */ - unsigned int mm_isa_start; /* Starting address of this */ - /* quad's memory mapped ISA */ - /* space (contains MDC */ - /* memory space). */ - unsigned int mm_isa_size; /* Size of this quad's memory*/ - /* mapped ISA space (contains*/ - /* MDC memory space). */ - unsigned int rmt_qmi_addr; /* Remote addr to access QMI.*/ - unsigned int lcl_qmi_addr; /* Local addr to access QMI. */ -}; - -/* - * Note: This structure must be NOT be changed unless the multiproc and - * OS are changed to reflect the new structure. - */ -struct sys_cfg_data { - unsigned int quad_id; - unsigned int bsp_proc_id; /* Boot Strap Processor in this quad. */ - unsigned int scd_version; /* Version number of this table. */ - unsigned int first_quad_id; - unsigned int quads_present31_0; /* 1 bit for each quad */ - unsigned int quads_present63_32; /* 1 bit for each quad */ - unsigned int config_flags; - unsigned int boot_flags; - unsigned int csr_start_addr; /* Absolute value (not in MB) */ - unsigned int csr_size; /* Absolute value (not in MB) */ - unsigned int lcl_apic_start_addr; /* Absolute value (not in MB) */ - unsigned int lcl_apic_size; /* Absolute value (not in MB) */ - unsigned int low_shrd_mem_base; /* 0 or 512MB or 1GB */ - unsigned int low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */ - /* may not be totally populated */ - unsigned int split_mem_enbl; /* 0 for no low shared memory */ - unsigned int mmio_sz; /* Size of total system memory mapped I/O */ - /* (in MB). */ - unsigned int quad_spin_lock; /* Spare location used for quad */ - /* bringup. */ - unsigned int nonzero55; /* For checksumming. */ - unsigned int nonzeroaa; /* For checksumming. */ - unsigned int scd_magic_number; - unsigned int system_type; - unsigned int checksum; - /* - * memory configuration area for each quad - */ - struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ -}; - -void numaq_tsc_disable(void); - -#endif /* CONFIG_X86_NUMAQ */ -#endif /* _ASM_X86_NUMAQ_H */ - diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 70e141520ad7..dcb5b15401ce 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -18,7 +18,6 @@ obj-y += apic_flat_64.o endif # APIC probe will depend on the listing order here -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o # For 32bit, probe_32 need to be listed last diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c deleted file mode 100644 index 030ea1c04f72..000000000000 --- a/arch/x86/kernel/apic/numaq_32.c +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -int found_numaq; - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_trans { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -static int mpc_record; - -static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; - -int mp_bus_id_to_node[MAX_MP_BUSSES]; -int mp_bus_id_to_local[MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id[NR_CPUS/4][4]; - - -static inline void numaq_register_node(int node, struct sys_cfg_data *scd) -{ - struct eachquadmem *eq = scd->eq + node; - u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; - u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; - int ret; - - node_set(node, numa_nodes_parsed); - ret = numa_add_memblk(node, start, end); - BUG_ON(ret < 0); -} - -/* - * Function: smp_dump_qct() - * - * Description: gets memory layout from the quad config table. This - * function also updates numa_nodes_parsed with the nodes (quads) present. - */ -static void __init smp_dump_qct(void) -{ - struct sys_cfg_data *scd; - int node; - - scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); - - for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) - numaq_register_node(node, scd); - } -} - -void numaq_tsc_disable(void) -{ - if (!found_numaq) - return; - - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - setup_clear_cpu_cap(X86_FEATURE_TSC); - } -} - -static void __init numaq_tsc_init(void) -{ - numaq_tsc_disable(); -} - -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - -/* x86_quirks member */ -static int mpc_apic_id(struct mpc_cpu *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->apicid); - - printk(KERN_DEBUG - "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, - m->apicver, quad, logical_apicid); - - return logical_apicid; -} - -/* x86_quirks member */ -static void mpc_oem_bus_info(struct mpc_bus *m, char *name) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - mp_bus_id_to_node[m->busid] = quad; - mp_bus_id_to_local[m->busid] = local; - - printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); -} - -/* x86_quirks member */ -static void mpc_oem_pci_bus(struct mpc_bus *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->busid; -} - -/* - * Called from mpparse code. - * mode = 0: prescan - * mode = 1: one mpc entry scanned - */ -static void numaq_mpc_record(unsigned int mode) -{ - if (!mode) - mpc_record = 0; - else - mpc_record++; -} - -static void __init MP_translation_info(struct mpc_trans *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Read/parse the MPC oem tables - */ -static void __init smp_read_mpc_oem(struct mpc_table *mpc) -{ - struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO - "Found an OEM MPC table at %8p - parsing it...\n", oemtable); - - if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->signature[0], oemtable->signature[1], - oemtable->signature[2], oemtable->signature[3]); - return; - } - - if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - - while (count < oemtable->length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_trans *m = (void *)oemptr; - - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } -} - -static __init void early_check_numaq(void) -{ - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - early_get_smp_config(); - - if (found_numaq) { - x86_init.mpparse.mpc_record = numaq_mpc_record; - x86_init.mpparse.setup_ioapic_ids = x86_init_noop; - x86_init.mpparse.mpc_apic_id = mpc_apic_id; - x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; - x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; - x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; - x86_init.timers.tsc_pre_init = numaq_tsc_init; - x86_init.pci.init = pci_numaq_init; - } -} - -int __init numaq_numa_init(void) -{ - early_check_numaq(); - if (!found_numaq) - return -ENOENT; - smp_dump_qct(); - - return 0; -} - -#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_logical(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline const struct cpumask *numaq_target_cpus(void) -{ - return cpu_all_mask; -} - -static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk(KERN_INFO - "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", - nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL, retmap); -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static int numaq_numa_cpu_node(int cpu) -{ - int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - - if (logical_apicid != BAD_APICID) - return numaq_apicid_to_node(logical_apicid); - return NUMA_NO_NODE; -} - -static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - physid_set_mask_of_physid(cpu + 4*node, retmap); -} - -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int phys_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) -{ - *apicid = 0x0F; - return 0; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static int -numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; - - return found_numaq; -} - -static int probe_numaq(void) -{ - /* already know from get_memcfg_numaq() */ - return found_numaq; -} - -static void numaq_setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk(KERN_INFO - "Remapping cross-quad port I/O for %d quads\n", num_quads); - - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - - printk(KERN_INFO - "xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -/* Use __refdata to keep false positive warning calm. */ -static struct apic __refdata apic_numaq = { - - .name = "NUMAQ", - .probe = probe_numaq, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = numaq_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* physical delivery on LOCAL quad: */ - .irq_dest_mode = 0, - - .target_cpus = numaq_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = numaq_check_apicid_used, - .check_apicid_present = numaq_check_apicid_present, - - .vector_allocation_domain = flat_vector_allocation_domain, - .init_apic_ldr = numaq_init_apic_ldr, - - .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = numaq_multi_timer_check, - .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = numaq_mps_oem_check, - - .get_apic_id = numaq_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - - .send_IPI_mask = numaq_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = numaq_send_IPI_allbutself, - .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, - .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - - /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .inquire_remote_apic = NULL, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, - .x86_32_numa_cpu_node = numaq_numa_cpu_node, -}; - -apic_driver(apic_numaq); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5cd9bfabd645..ea56e7c9abe6 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -267,10 +267,6 @@ static void intel_workarounds(struct cpuinfo_x86 *c) } #endif -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif - intel_smp_check(c); } #else diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 27aa0455fab3..1d045f9c390f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -687,10 +687,6 @@ static int __init dummy_numa_init(void) void __init x86_numa_init(void) { if (!numa_off) { -#ifdef CONFIG_X86_NUMAQ - if (!numa_init(numaq_numa_init)) - return; -#endif #ifdef CONFIG_ACPI_NUMA if (!numa_init(x86_acpi_numa_init)) return; diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 758b8272c821..5c6fc3577a49 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -13,7 +13,6 @@ obj-y += legacy.o irq.o obj-$(CONFIG_STA2X11) += sta2x11-fixup.o -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c deleted file mode 100644 index 72c229f9ebcf..000000000000 --- a/arch/x86/pci/numaq_32.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * numaq_32.c - Low-level PCI access for NUMA-Q machines - */ - -#include -#include -#include -#include -#include -#include -#include - -#define BUS2QUAD(global) (mp_bus_id_to_node[global]) - -#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) - -#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) - -#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) - -static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) -{ - unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); - if (xquad_portio) - writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); - else - outl(val, 0xCF8); -} - -static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - - WARN_ON(seg); - if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - raw_spin_lock_irqsave(&pci_config_lock, flags); - - write_cf8(bus, devfn, reg); - - switch (len) { - case 1: - if (xquad_portio) - *value = readb(adr + (reg & 3)); - else - *value = inb(0xCFC + (reg & 3)); - break; - case 2: - if (xquad_portio) - *value = readw(adr + (reg & 2)); - else - *value = inw(0xCFC + (reg & 2)); - break; - case 4: - if (xquad_portio) - *value = readl(adr); - else - *value = inl(0xCFC); - break; - } - - raw_spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - - WARN_ON(seg); - if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - raw_spin_lock_irqsave(&pci_config_lock, flags); - - write_cf8(bus, devfn, reg); - - switch (len) { - case 1: - if (xquad_portio) - writeb(value, adr + (reg & 3)); - else - outb((u8)value, 0xCFC + (reg & 3)); - break; - case 2: - if (xquad_portio) - writew(value, adr + (reg & 2)); - else - outw((u16)value, 0xCFC + (reg & 2)); - break; - case 4: - if (xquad_portio) - writel(value, adr + reg); - else - outl((u32)value, 0xCFC); - break; - } - - raw_spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_MQ_ADDRESS - -static const struct pci_raw_ops pci_direct_conf1_mq = { - .read = pci_conf1_mq_read, - .write = pci_conf1_mq_write -}; - - -static void pci_fixup_i450nx(struct pci_dev *d) -{ - /* - * i450NX -- Find and scan all secondary buses on all PXB's. - */ - int pxb, reg; - u8 busno, suba, subb; - int quad = BUS2QUAD(d->bus->number); - - dev_info(&d->dev, "searching for i450NX host bridges\n"); - reg = 0xd0; - for(pxb=0; pxb<2; pxb++) { - pci_read_config_byte(d, reg++, &busno); - pci_read_config_byte(d, reg++, &suba); - pci_read_config_byte(d, reg++, &subb); - dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", - pxb, busno, suba, subb); - if (busno) { - /* Bus A */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); - } - if (suba < subb) { - /* Bus B */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); - } - } - pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); - -int __init pci_numaq_init(void) -{ - int quad; - - raw_pci_ops = &pci_direct_conf1_mq; - - pcibios_scan_root(0); - if (num_online_nodes() > 1) - for_each_online_node(quad) { - if (quad == 0) - continue; - printk("Scanning PCI bus %d for quad %d\n", - QUADLOCAL2BUS(quad,0), quad); - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); - } - return 0; -} -- cgit v1.2.3 From 8b80fd8b45458a7d0b726d454c51c5219e0fb2ee Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 26 Feb 2014 12:06:50 -0700 Subject: x86: Use clflushopt in clflush_cache_range If clflushopt is available on the system, use it instead of clflush in clflush_cache_range. Signed-off-by: Ross Zwisler Link: http://lkml.kernel.org/r/1393441612-19729-3-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f46c016..b1c46723b19b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -126,8 +126,8 @@ within(unsigned long addr, unsigned long start, unsigned long end) * @vaddr: virtual start address * @size: number of bytes to flush * - * clflush is an unordered instruction which needs fencing with mfence - * to avoid ordering issues. + * clflushopt is an unordered instruction which needs fencing with mfence or + * sfence to avoid ordering issues. */ void clflush_cache_range(void *vaddr, unsigned int size) { @@ -136,11 +136,11 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflush(vaddr); + clflushopt(vaddr); /* * Flush any possible final partial cacheline: */ - clflush(vend); + clflushopt(vend); mb(); } -- cgit v1.2.3 From ef6bea6ddf0e76077d0798e57b374015b23a837e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:14 +0100 Subject: x86, ptdump: Add the functionality to dump an arbitrary pagetable With reusing the ->trampoline_pgd page table for mapping EFI regions in order to use them after having switched to EFI virtual mode, it is very useful to be able to dump aforementioned page table in dmesg. This adds that functionality through the walk_pgd_level() interface which can be called from somewhere else. The original functionality of dumping to debugfs remains untouched. Cc: Arjan van de Ven Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- arch/x86/include/asm/pgtable.h | 3 +- arch/x86/mm/dump_pagetables.c | 84 +++++++++++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 31 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5ad38ad07890..938ef1d0458e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,9 +15,10 @@ : (prot)) #ifndef __ASSEMBLY__ - #include +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a33081..20621d753d5f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,6 +30,7 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + bool to_dmesg; }; struct addr_marker { @@ -88,10 +89,28 @@ static struct addr_marker address_markers[] = { #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) +#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ +({ \ + if (to_dmesg) \ + printk(KERN_INFO fmt, ##args); \ + else \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ +({ \ + if (to_dmesg) \ + printk(KERN_CONT fmt, ##args); \ + else \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level) +static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) { pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = @@ -99,47 +118,47 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level) if (!pgprot_val(prot)) { /* Not present */ - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } else { if (pr & _PAGE_USER) - seq_printf(m, "USR "); + pt_dump_cont_printf(m, dmsg, "USR "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_RW) - seq_printf(m, "RW "); + pt_dump_cont_printf(m, dmsg, "RW "); else - seq_printf(m, "ro "); + pt_dump_cont_printf(m, dmsg, "ro "); if (pr & _PAGE_PWT) - seq_printf(m, "PWT "); + pt_dump_cont_printf(m, dmsg, "PWT "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_PCD) - seq_printf(m, "PCD "); + pt_dump_cont_printf(m, dmsg, "PCD "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); /* Bit 9 has a different meaning on level 3 vs 4 */ if (level <= 3) { if (pr & _PAGE_PSE) - seq_printf(m, "PSE "); + pt_dump_cont_printf(m, dmsg, "PSE "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } else { if (pr & _PAGE_PAT) - seq_printf(m, "pat "); + pt_dump_cont_printf(m, dmsg, "pat "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } if (pr & _PAGE_GLOBAL) - seq_printf(m, "GLB "); + pt_dump_cont_printf(m, dmsg, "GLB "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_NX) - seq_printf(m, "NX "); + pt_dump_cont_printf(m, dmsg, "NX "); else - seq_printf(m, "x "); + pt_dump_cont_printf(m, dmsg, "x "); } - seq_printf(m, "%s\n", level_name[level]); + pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } /* @@ -178,7 +197,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { const char *unit = units; @@ -188,17 +208,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); + pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); delta = (st->current_address - st->start_address) >> 10; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); + pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level, st->to_dmesg); /* * We print markers for special areas of address space, @@ -207,7 +227,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, */ if (st->current_address >= st->marker[1].start_address) { st->marker++; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); } st->start_address = st->current_address; @@ -296,7 +317,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -static void walk_pgd_level(struct seq_file *m) +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_level4_pgt; @@ -304,9 +325,12 @@ static void walk_pgd_level(struct seq_file *m) pgd_t *start = swapper_pg_dir; #endif int i; - struct pg_state st; + struct pg_state st = {}; - memset(&st, 0, sizeof(st)); + if (pgd) { + start = pgd; + st.to_dmesg = true; + } for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); @@ -331,7 +355,7 @@ static void walk_pgd_level(struct seq_file *m) static int ptdump_show(struct seq_file *m, void *v) { - walk_pgd_level(m); + ptdump_walk_pgd_level(m, NULL); return 0; } -- cgit v1.2.3 From 42a5477251f0e0f33ad5f6a95c48d685ec03191e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:16 +0100 Subject: x86, pageattr: Export page unmapping interface We will use it in efi so expose it. Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming --- arch/x86/include/asm/pgtable_types.h | 2 ++ arch/x86/mm/pageattr.c | 44 +++++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1aa9ccd43223..94e40f1efdfd 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, + unsigned numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f46c016..a3488689e301 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } +static bool try_to_free_pud_page(pud_t *pud) +{ + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) + if (!pud_none(pud[i])) + return false; + + free_page((unsigned long)pud); + return true; +} + static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } +static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) +{ + pgd_t *pgd_entry = root + pgd_index(addr); + + unmap_pud_range(pgd_entry, addr, end); + + if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) + pgd_clear(pgd_entry); +} + static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, static int populate_pgd(struct cpa_data *cpa, unsigned long addr) { pgprot_t pgprot = __pgprot(_KERNPG_TABLE); - bool allocd_pgd = false; - pgd_t *pgd_entry; pud_t *pud = NULL; /* shut up gcc */ + pgd_t *pgd_entry; int ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) return -1; set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); - allocd_pgd = true; } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - unmap_pud_range(pgd_entry, addr, + unmap_pgd_range(cpa->pgd, addr, addr + (cpa->numpages << PAGE_SHIFT)); - - if (allocd_pgd) { - /* - * If I allocated this PUD page, I can just as well - * free it in this error path. - */ - pgd_clear(pgd_entry); - free_page((unsigned long)pud); - } return ret; } + cpa->numpages = ret; return 0; } @@ -1861,6 +1873,12 @@ out: return retval; } +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, + unsigned numpages) +{ + unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); +} + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. -- cgit v1.2.3 From 426e34cc4f6094cefe4f3175764cdf583128e7cd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 6 Dec 2013 21:13:04 +0000 Subject: x86/mm/pageattr: Always dump the right page table in an oops Now that we have EFI-specific page tables we need to lookup the pgd when dumping those page tables, rather than assuming that swapper_pgdir is the current pgdir. Remove the double underscore prefix, which is usually reserved for static functions. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/include/asm/pgtable_types.h | 2 ++ arch/x86/mm/fault.c | 7 ++++++- arch/x86/mm/pageattr.c | 12 ++++++++---- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1aa9ccd43223..24ae4211ba34 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -382,6 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { } * as a pte too. */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6dea040cc3a1..6055a2001b27 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -584,8 +584,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (error_code & PF_INSTR) { unsigned int level; + pgd_t *pgd; + pte_t *pte; - pte_t *pte = lookup_address(address, &level); + pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd += pgd_index(address); + + pte = lookup_address_in_pgd(pgd, address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) printk(nx_warning, from_kuid(&init_user_ns, current_uid())); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f46c016..14c656f0483b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -323,8 +323,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, return prot; } -static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, - unsigned int *level) +/* + * Lookup the page table entry for a virtual address in a specific pgd. + * Return a pointer to the entry and the level of the mapping. + */ +pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level) { pud_t *pud; pmd_t *pmd; @@ -365,7 +369,7 @@ static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, */ pte_t *lookup_address(unsigned long address, unsigned int *level) { - return __lookup_address_in_pgd(pgd_offset_k(address), address, level); + return lookup_address_in_pgd(pgd_offset_k(address), address, level); } EXPORT_SYMBOL_GPL(lookup_address); @@ -373,7 +377,7 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, unsigned int *level) { if (cpa->pgd) - return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), + return lookup_address_in_pgd(cpa->pgd + pgd_index(address), address, level); return lookup_address(address, level); -- cgit v1.2.3 From 0ac09f9f8cd1fb028a48330edba6023d347d3cea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2014 17:05:26 +0100 Subject: x86, trace: Fix CR2 corruption when tracing page faults The trace_do_page_fault function trigger tracepoint and then handles the actual page fault. This could lead to error if the tracepoint caused page fault. The original cr2 value gets lost and the original page fault handler kills current process with SIGSEGV. This happens if you record page faults with callchain data, the user part of it will cause tracepoint handler to page fault: # perf record -g -e exceptions:page_fault_user ls Fixing this by saving the original cr2 value and using it after tracepoint handler is done. v2: Moving the cr2 read before exception_enter, because it could trigger tracepoint as well. Reported-by: Arnaldo Carvalho de Melo Reported-by: Vince Weaver Tested-by: Vince Weaver Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Seiji Aguchi Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1402211701380.6395@vincent-weaver-1.um.maine.edu Link: http://lkml.kernel.org/r/20140228160526.GD1133@krava.brq.redhat.com --- arch/x86/mm/fault.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6dea040cc3a1..e7fa28bf3262 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1022,11 +1022,11 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * routines. */ static void __kprobes -__do_page_fault(struct pt_regs *regs, unsigned long error_code) +__do_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { struct vm_area_struct *vma; struct task_struct *tsk; - unsigned long address; struct mm_struct *mm; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -1034,9 +1034,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; mm = tsk->mm; - /* Get the faulting address: */ - address = read_cr2(); - /* * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. @@ -1252,9 +1249,11 @@ dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* Get the faulting address: */ + unsigned long address = read_cr2(); prev_state = exception_enter(); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } @@ -1271,9 +1270,16 @@ dotraplinkage void __kprobes trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* + * The exception_enter and tracepoint processing could + * trigger another page faults (user space callchain + * reading) and destroy the original cr2 value, so read + * the faulting address now. + */ + unsigned long address = read_cr2(); prev_state = exception_enter(); trace_page_fault_entries(regs, error_code); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -- cgit v1.2.3 From d4078e232267ff53f3b030b9698a3c001db4dbec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Mar 2014 14:07:49 +0100 Subject: x86, trace: Further robustify CR2 handling vs tracing Building on commit 0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when tracing page faults") this patch addresses another few issues: - Now that read_cr2() is lifted into trace_do_page_fault(), we should pass the address to trace_page_fault_entries() to avoid it re-reading a potentially changed cr2. - Put both trace_do_page_fault() and trace_page_fault_entries() under CONFIG_TRACING. - Mark both fault entry functions {,trace_}do_page_fault() as notrace to avoid getting __mcount or other function entry trace callbacks before we've observed CR2. - Mark __do_page_fault() as noinline to guarantee the function tracer does get to see the fault. Cc: Cc: Acked-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140306145300.GO9987@twins.programming.kicks-ass.net Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e7fa28bf3262..a10c8c792161 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1020,8 +1020,12 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. + * + * This function must have noinline because both callers + * {,trace_}do_page_fault() have notrace on. Having this an actual function + * guarantees there's a function trace entry. */ -static void __kprobes +static void __kprobes noinline __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1245,31 +1249,38 @@ good_area: up_read(&mm->mmap_sem); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; - /* Get the faulting address: */ - unsigned long address = read_cr2(); + + /* + * We must have this function tagged with __kprobes, notrace and call + * read_cr2() before calling anything else. To avoid calling any kind + * of tracing machinery before we've observed the CR2 value. + * + * exception_{enter,exit}() contain all sorts of tracepoints. + */ prev_state = exception_enter(); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -static void trace_page_fault_entries(struct pt_regs *regs, +#ifdef CONFIG_TRACING +static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, unsigned long error_code) { if (user_mode(regs)) - trace_page_fault_user(read_cr2(), regs, error_code); + trace_page_fault_user(address, regs, error_code); else - trace_page_fault_kernel(read_cr2(), regs, error_code); + trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { - enum ctx_state prev_state; /* * The exception_enter and tracepoint processing could * trigger another page faults (user space callchain @@ -1277,9 +1288,11 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) * the faulting address now. */ unsigned long address = read_cr2(); + enum ctx_state prev_state; prev_state = exception_enter(); - trace_page_fault_entries(regs, error_code); + trace_page_fault_entries(address, regs, error_code); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +#endif /* CONFIG_TRACING */ -- cgit v1.2.3 From b82ad3d3944193e5f95319c30fd24e39d0d7b510 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Mar 2014 15:13:04 +0100 Subject: x86, pageattr: Correct WBINVD spelling in comment It is WBINVD, for INValiDate and not "wbindv". Use caps for instruction names, while at it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394633584-5509-4-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f46c016..ba957c9b677c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1377,10 +1377,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cache = cache_attr(mask_set); /* - * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the + * On success we use CLFLUSH, when the CPU supports it to + * avoid the WBINVD. If the CPU does not support it and in the * error case we fall back to cpa_flush_all (which uses - * wbindv): + * WBINVD): */ if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { -- cgit v1.2.3 From 6b550f6f2004017f1b4633d2c9e39b610bfe84f0 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 7 Apr 2014 15:39:46 -0700 Subject: x86/mm: sparse warning fix for early_memremap This patch series takes the common bits from the x86 early ioremap implementation and creates a generic implementation which may be used by other architectures. The early ioremap interfaces are intended for situations where boot code needs to make temporary virtual mappings before the normal ioremap interfaces are available. Typically, this means before paging_init() has run. This patch (of 6): There's a lot of sparse warnings for code like below: void *a = early_memremap(phys_addr, size); early_memremap intend to map kernel memory with ioremap facility, the return pointer should be a kernel ram pointer instead of iomem one. For making the function clearer and supressing sparse warnings this patch do below two things: 1. cast to (__force void *) for the return value of early_memremap 2. add early_memunmap function and pass (__force void __iomem *) to iounmap From Boris: "Ingo told me yesterday, it makes sense too. I'd guess we can try it. FWIW, all callers of early_memremap use the memory they get remapped as normal memory so we should be safe" Signed-off-by: Dave Young Signed-off-by: Mark Salter Acked-by: H. Peter Anvin Cc: Borislav Petkov Cc: Catalin Marinas Cc: Will Deacon Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/io.h | 3 ++- arch/x86/mm/ioremap.c | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 91d9c69a629e..7cec9ef3a73a 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -325,9 +325,10 @@ extern void early_ioremap_init(void); extern void early_ioremap_reset(void); extern void __iomem *early_ioremap(resource_size_t phys_addr, unsigned long size); -extern void __iomem *early_memremap(resource_size_t phys_addr, +extern void *early_memremap(resource_size_t phys_addr, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); +extern void early_memunmap(void *addr, unsigned long size); extern void fixup_early_ioremap(void); extern bool is_early_ioremap_ptep(pte_t *ptep); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 799580cabc78..bbb450412810 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -562,10 +562,9 @@ early_ioremap(resource_size_t phys_addr, unsigned long size) } /* Remap memory */ -void __init __iomem * -early_memremap(resource_size_t phys_addr, unsigned long size) +void __init *early_memremap(resource_size_t phys_addr, unsigned long size) { - return __early_ioremap(phys_addr, size, PAGE_KERNEL); + return (__force void *)__early_ioremap(phys_addr, size, PAGE_KERNEL); } void __init early_iounmap(void __iomem *addr, unsigned long size) @@ -620,3 +619,8 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } + +void __init early_memunmap(void *addr, unsigned long size) +{ + early_iounmap((__force void __iomem *)addr, size); +} -- cgit v1.2.3 From 5b7c73e00968c7fdf908c3ced31e1cc83c01ba14 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 7 Apr 2014 15:39:49 -0700 Subject: x86: use generic early_ioremap Move x86 over to the generic early ioremap implementation. Signed-off-by: Mark Salter Acked-by: H. Peter Anvin Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Young Cc: Will Deacon Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/fixmap.h | 6 ++ arch/x86/include/asm/io.h | 15 +-- arch/x86/mm/ioremap.c | 228 +----------------------------------------- arch/x86/mm/pgtable_32.c | 2 +- 6 files changed, 13 insertions(+), 240 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f73071742975..5b8ec0f53b57 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -43,6 +43,7 @@ config X86 select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES + select GENERIC_EARLY_IOREMAP select HAVE_OPTPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4acddc43ee0c..3ca9762e1649 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,5 +5,6 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += early_ioremap.h generic-y += cputime.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8dcd35c4c787..43f482a0db37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -163,5 +163,11 @@ static inline void __set_fixmap(enum fixed_addresses idx, #include +#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) +#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0)) + +void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7cec9ef3a73a..b8237d8a1e0c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -39,6 +39,7 @@ #include #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -316,20 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); -/* - * early_ioremap() and early_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void early_ioremap_init(void); -extern void early_ioremap_reset(void); -extern void __iomem *early_ioremap(resource_size_t phys_addr, - unsigned long size); -extern void *early_memremap(resource_size_t phys_addr, - unsigned long size); -extern void early_iounmap(void __iomem *addr, unsigned long size); -extern void early_memunmap(void *addr, unsigned long size); -extern void fixup_early_ioremap(void); extern bool is_early_ioremap_ptep(pte_t *ptep); #ifdef CONFIG_XEN diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bbb450412810..597ac155c91c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -328,17 +328,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -static int __initdata early_ioremap_debug; - -static int __init early_ioremap_debug_setup(char *str) -{ - early_ioremap_debug = 1; - - return 0; -} -early_param("early_ioremap_debug", early_ioremap_debug_setup); - -static __initdata int after_paging_init; static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) @@ -362,18 +351,11 @@ bool __init is_early_ioremap_ptep(pte_t *ptep) return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; } -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - void __init early_ioremap_init(void) { pmd_t *pmd; - int i; - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_init()\n"); - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + early_ioremap_setup(); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); @@ -402,13 +384,8 @@ void __init early_ioremap_init(void) } } -void __init early_ioremap_reset(void) -{ - after_paging_init = 1; -} - -static void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) +void __init __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) { unsigned long addr = __fix_to_virt(idx); pte_t *pte; @@ -425,202 +402,3 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, pte_clear(&init_mm, addr, pte); __flush_tlb_one(addr); } - -static inline void __init early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t prot) -{ - if (after_paging_init) - __set_fixmap(idx, phys, prot); - else - __early_set_fixmap(idx, phys, prot); -} - -static inline void __init early_clear_fixmap(enum fixed_addresses idx) -{ - if (after_paging_init) - clear_fixmap(idx); - else - __early_set_fixmap(idx, 0, __pgprot(0)); -} - -static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; -static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - -void __init fixup_early_ioremap(void) -{ - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i]) { - WARN_ON(1); - break; - } - } - - early_ioremap_init(); -} - -static int __init check_early_ioremap_leak(void) -{ - int count = 0; - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - if (prev_map[i]) - count++; - - if (!count) - return 0; - WARN(1, KERN_WARNING - "Debug warning: early ioremap leak of %d areas detected.\n", - count); - printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - - return 1; -} -late_initcall(check_early_ioremap_leak); - -static void __init __iomem * -__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) -{ - unsigned long offset; - resource_size_t last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - WARN_ON(system_state != SYSTEM_BOOTING); - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (!prev_map[i]) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n", - __func__, (u64)phys_addr, size); - WARN_ON(1); - return NULL; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ", - __func__, (u64)phys_addr, size, slot); - dump_stack(); - } - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) { - WARN_ON(1); - return NULL; - } - - prev_size[slot] = size; - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr + 1) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) { - WARN_ON(1); - return NULL; - } - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_set_fixmap(idx, phys_addr, prot); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); - return prev_map[slot]; -} - -/* Remap an IO device */ -void __init __iomem * -early_ioremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); -} - -/* Remap memory */ -void __init *early_memremap(resource_size_t phys_addr, unsigned long size) -{ - return (__force void *)__early_ioremap(phys_addr, size, PAGE_KERNEL); -} - -void __init early_iounmap(void __iomem *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i] == addr) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", - addr, size); - WARN_ON(1); - return; - } - - if (prev_size[slot] != size) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", - addr, size, slot, prev_size[slot]); - WARN_ON(1); - return; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, - size, slot); - dump_stack(); - } - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { - WARN_ON(1); - return; - } - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_clear_fixmap(idx); - --idx; - --nrpages; - } - prev_map[slot] = NULL; -} - -void __init early_memunmap(void *addr, unsigned long size) -{ - early_iounmap((__force void __iomem *)addr, size); -} diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index a69bcb8c7621..4dd8cf652579 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -127,7 +127,7 @@ static int __init parse_reservetop(char *arg) address = memparse(arg, &arg); reserve_top_address(address); - fixup_early_ioremap(); + early_ioremap_init(); return 0; } early_param("reservetop", parse_reservetop); -- cgit v1.2.3 From d0057ca4c1fe73c05a9e077cc7691217370b3283 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 8 Apr 2014 16:04:13 -0700 Subject: arch/x86/mm/kmemcheck/kmemcheck.c: use kstrtoint() instead of sscanf() Kmemcheck should use the preferred interface for parsing command line arguments, kstrto*(), rather than sscanf() itself. Use it appropriately. Signed-off-by: David Rientjes Cc: Vegard Nossum Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kmemcheck/kmemcheck.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index d87dd6d042d6..dd89a13f1051 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -78,10 +78,16 @@ early_initcall(kmemcheck_init); */ static int __init param_kmemcheck(char *str) { + int val; + int ret; + if (!str) return -EINVAL; - sscanf(str, "%d", &kmemcheck_enabled); + ret = kstrtoint(str, 0, &val); + if (ret) + return ret; + kmemcheck_enabled = val; return 0; } -- cgit v1.2.3