diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2022-10-20 16:54:33 +0300 |
---|---|---|
committer | Ard Biesheuvel <ardb@kernel.org> | 2023-09-11 11:13:17 +0300 |
commit | cf8e8658100d4eae80ce9b21f7a81cb024dd5057 (patch) | |
tree | 31d3b640bebf97c33d354768fc44dfd532c2df81 /arch/ia64/mm | |
parent | a0334bf78b95532cec54f56b53e8ae1bfe7e1ca1 (diff) | |
download | linux-cf8e8658100d4eae80ce9b21f7a81cb024dd5057.tar.xz |
arch: Remove Itanium (IA-64) architecture
The Itanium architecture is obsolete, and an informal survey [0] reveals
that any residual use of Itanium hardware in production is mostly HP-UX
or OpenVMS based. The use of Linux on Itanium appears to be limited to
enthusiasts that occasionally boot a fresh Linux kernel to see whether
things are still working as intended, and perhaps to churn out some
distro packages that are rarely used in practice.
None of the original companies behind Itanium still produce or support
any hardware or software for the architecture, and it is listed as
'Orphaned' in the MAINTAINERS file, as apparently, none of the engineers
that contributed on behalf of those companies (nor anyone else, for that
matter) have been willing to support or maintain the architecture
upstream or even be responsible for applying the odd fix. The Intel
firmware team removed all IA-64 support from the Tianocore/EDK2
reference implementation of EFI in 2018. (Itanium is the original
architecture for which EFI was developed, and the way Linux supports it
deviates significantly from other architectures.) Some distros, such as
Debian and Gentoo, still maintain [unofficial] ia64 ports, but many have
dropped support years ago.
While the argument is being made [1] that there is a 'for the common
good' angle to being able to build and run existing projects such as the
Grid Community Toolkit [2] on Itanium for interoperability testing, the
fact remains that none of those projects are known to be deployed on
Linux/ia64, and very few people actually have access to such a system in
the first place. Even if there were ways imaginable in which Linux/ia64
could be put to good use today, what matters is whether anyone is
actually doing that, and this does not appear to be the case.
There are no emulators widely available, and so boot testing Itanium is
generally infeasible for ordinary contributors. GCC still supports IA-64
but its compile farm [3] no longer has any IA-64 machines. GLIBC would
like to get rid of IA-64 [4] too because it would permit some overdue
code cleanups. In summary, the benefits to the ecosystem of having IA-64
be part of it are mostly theoretical, whereas the maintenance overhead
of keeping it supported is real.
So let's rip off the band aid, and remove the IA-64 arch code entirely.
This follows the timeline proposed by the Debian/ia64 maintainer [5],
which removes support in a controlled manner, leaving IA-64 in a known
good state in the most recent LTS release. Other projects will follow
once the kernel support is removed.
[0] https://lore.kernel.org/all/CAMj1kXFCMh_578jniKpUtx_j8ByHnt=s7S+yQ+vGbKt9ud7+kQ@mail.gmail.com/
[1] https://lore.kernel.org/all/0075883c-7c51-00f5-2c2d-5119c1820410@web.de/
[2] https://gridcf.org/gct-docs/latest/index.html
[3] https://cfarm.tetaneutral.net/machines/list/
[4] https://lore.kernel.org/all/87bkiilpc4.fsf@mid.deneb.enyo.de/
[5] https://lore.kernel.org/all/ff58a3e76e5102c94bb5946d99187b358def688a.camel@physik.fu-berlin.de/
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r-- | arch/ia64/mm/Makefile | 11 | ||||
-rw-r--r-- | arch/ia64/mm/contig.c | 208 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 635 | ||||
-rw-r--r-- | arch/ia64/mm/extable.c | 24 | ||||
-rw-r--r-- | arch/ia64/mm/fault.c | 251 | ||||
-rw-r--r-- | arch/ia64/mm/hugetlbpage.c | 186 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 532 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 94 | ||||
-rw-r--r-- | arch/ia64/mm/numa.c | 80 | ||||
-rw-r--r-- | arch/ia64/mm/tlb.c | 591 |
10 files changed, 0 insertions, 2612 deletions
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile deleted file mode 100644 index c03f63c62ac4..000000000000 --- a/arch/ia64/mm/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the ia64-specific parts of the memory manager. -# - -obj-y := init.o fault.o tlb.o extable.o ioremap.o - -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_SPARSEMEM) += discontig.o -obj-$(CONFIG_FLATMEM) += contig.o diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c deleted file mode 100644 index 1e9eaa107eb7..000000000000 --- a/arch/ia64/mm/contig.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * Stephane Eranian <eranian@hpl.hp.com> - * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> - * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. - * - * Routines used by ia64 machines with contiguous (or virtually contiguous) - * memory. - */ -#include <linux/efi.h> -#include <linux/memblock.h> -#include <linux/mm.h> -#include <linux/nmi.h> -#include <linux/swap.h> -#include <linux/sizes.h> - -#include <asm/efi.h> -#include <asm/meminit.h> -#include <asm/sections.h> -#include <asm/mca.h> - -/* physical address where the bootmem map is located */ -unsigned long bootmap_start; - -#ifdef CONFIG_SMP -static void *cpu_data; -/** - * per_cpu_init - setup per-cpu variables - * - * Allocate and setup per-cpu data areas. - */ -void *per_cpu_init(void) -{ - static bool first_time = true; - void *cpu0_data = __cpu0_per_cpu; - unsigned int cpu; - - if (!first_time) - goto skip; - first_time = false; - - /* - * get_free_pages() cannot be used before cpu_init() done. - * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs - * to avoid that AP calls get_zeroed_page(). - */ - for_each_possible_cpu(cpu) { - void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start; - - memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start; - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - - /* - * percpu area for cpu0 is moved from the __init area - * which is setup by head.S and used till this point. - * Update ar.k3. This move is ensures that percpu - * area for cpu0 is on the correct node and its - * virtual address isn't insanely far from other - * percpu areas which is important for congruent - * percpu allocator. - */ - if (cpu == 0) - ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) - - (unsigned long)__per_cpu_start); - - cpu_data += PERCPU_PAGE_SIZE; - } -skip: - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; -} - -static inline __init void -alloc_per_cpu_data(void) -{ - size_t size = PERCPU_PAGE_SIZE * num_possible_cpus(); - - cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!cpu_data) - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); -} - -/** - * setup_per_cpu_areas - setup percpu areas - * - * Arch code has already allocated and initialized percpu areas. All - * this function has to do is to teach the determined layout to the - * dynamic percpu allocator, which happens to be more complex than - * creating whole new ones using helpers. - */ -void __init -setup_per_cpu_areas(void) -{ - struct pcpu_alloc_info *ai; - struct pcpu_group_info *gi; - unsigned int cpu; - ssize_t static_size, reserved_size, dyn_size; - - ai = pcpu_alloc_alloc_info(1, num_possible_cpus()); - if (!ai) - panic("failed to allocate pcpu_alloc_info"); - gi = &ai->groups[0]; - - /* units are assigned consecutively to possible cpus */ - for_each_possible_cpu(cpu) - gi->cpu_map[gi->nr_units++] = cpu; - - /* set parameters */ - static_size = __per_cpu_end - __per_cpu_start; - reserved_size = PERCPU_MODULE_RESERVE; - dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; - if (dyn_size < 0) - panic("percpu area overflow static=%zd reserved=%zd\n", - static_size, reserved_size); - - ai->static_size = static_size; - ai->reserved_size = reserved_size; - ai->dyn_size = dyn_size; - ai->unit_size = PERCPU_PAGE_SIZE; - ai->atom_size = PAGE_SIZE; - ai->alloc_size = PERCPU_PAGE_SIZE; - - pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]); - pcpu_free_alloc_info(ai); -} -#else -#define alloc_per_cpu_data() do { } while (0) -#endif /* CONFIG_SMP */ - -/** - * find_memory - setup memory map - * - * Walk the EFI memory map and find usable memory for the system, taking - * into account reserved areas. - */ -void __init -find_memory (void) -{ - reserve_memory(); - - /* first find highest page frame number */ - min_low_pfn = ~0UL; - max_low_pfn = 0; - efi_memmap_walk(find_max_min_low_pfn, NULL); - max_pfn = max_low_pfn; - - memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE); - - find_initrd(); - - alloc_per_cpu_data(); -} - -static int __init find_largest_hole(u64 start, u64 end, void *arg) -{ - u64 *max_gap = arg; - - static u64 last_end = PAGE_OFFSET; - - /* NOTE: this algorithm assumes efi memmap table is ordered */ - - if (*max_gap < (start - last_end)) - *max_gap = start - last_end; - last_end = end; - return 0; -} - -static void __init verify_gap_absence(void) -{ - unsigned long max_gap; - - /* Forbid FLATMEM if hole is > than 1G */ - efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); - if (max_gap >= SZ_1G) - panic("Cannot use FLATMEM with %ldMB hole\n" - "Please switch over to SPARSEMEM\n", - (max_gap >> 20)); -} - -/* - * Set up the page tables. - */ - -void __init -paging_init (void) -{ - unsigned long max_dma; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_DMA32] = max_dma; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - - verify_gap_absence(); - - free_area_init(max_zone_pfns); - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); -} diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c deleted file mode 100644 index 73d0db36edb6..000000000000 --- a/arch/ia64/mm/discontig.c +++ /dev/null @@ -1,635 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. - * Copyright (c) 2001 Intel Corp. - * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> - * Copyright (c) 2002 NEC Corp. - * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> - * Copyright (c) 2004 Silicon Graphics, Inc - * Russ Anderson <rja@sgi.com> - * Jesse Barnes <jbarnes@sgi.com> - * Jack Steiner <steiner@sgi.com> - */ - -/* - * Platform initialization for Discontig Memory - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/nmi.h> -#include <linux/swap.h> -#include <linux/memblock.h> -#include <linux/acpi.h> -#include <linux/efi.h> -#include <linux/nodemask.h> -#include <linux/slab.h> -#include <asm/efi.h> -#include <asm/tlb.h> -#include <asm/meminit.h> -#include <asm/numa.h> -#include <asm/sections.h> - -/* - * Track per-node information needed to setup the boot memory allocator, the - * per-node areas, and the real VM. - */ -struct early_node_data { - struct ia64_node_data *node_data; - unsigned long pernode_addr; - unsigned long pernode_size; - unsigned long min_pfn; - unsigned long max_pfn; -}; - -static struct early_node_data mem_data[MAX_NUMNODES] __initdata; -static nodemask_t memory_less_mask __initdata; - -pg_data_t *pgdat_list[MAX_NUMNODES]; - -/* - * To prevent cache aliasing effects, align per-node structures so that they - * start at addresses that are strided by node number. - */ -#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) -#define NODEDATA_ALIGN(addr, node) \ - ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ - (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) - -/** - * build_node_maps - callback to setup mem_data structs for each node - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * Detect extents of each piece of memory that we wish to - * treat as a virtually contiguous block (i.e. each node). Each such block - * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down - * if necessary. Any non-existent pages will simply be part of the virtual - * memmap. - */ -static int __init build_node_maps(unsigned long start, unsigned long len, - int node) -{ - unsigned long spfn, epfn, end = start + len; - - epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; - spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; - - if (!mem_data[node].min_pfn) { - mem_data[node].min_pfn = spfn; - mem_data[node].max_pfn = epfn; - } else { - mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); - mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); - } - - return 0; -} - -/** - * early_nr_cpus_node - return number of cpus on a given node - * @node: node to check - * - * Count the number of cpus on @node. We can't use nr_cpus_node() yet because - * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. Note that node 0 will also count all non-existent cpus. - */ -static int early_nr_cpus_node(int node) -{ - int cpu, n = 0; - - for_each_possible_early_cpu(cpu) - if (node == node_cpuid[cpu].nid) - n++; - - return n; -} - -/** - * compute_pernodesize - compute size of pernode data - * @node: the node id. - */ -static unsigned long compute_pernodesize(int node) -{ - unsigned long pernodesize = 0, cpus; - - cpus = early_nr_cpus_node(node); - pernodesize += PERCPU_PAGE_SIZE * cpus; - pernodesize += node * L1_CACHE_BYTES; - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize = PAGE_ALIGN(pernodesize); - return pernodesize; -} - -/** - * per_cpu_node_setup - setup per-cpu areas on each node - * @cpu_data: per-cpu area on this node - * @node: node to setup - * - * Copy the static per-cpu data into the region we just set aside and then - * setup __per_cpu_offset for each CPU on this node. Return a pointer to - * the end of the area. - */ -static void *per_cpu_node_setup(void *cpu_data, int node) -{ -#ifdef CONFIG_SMP - int cpu; - - for_each_possible_early_cpu(cpu) { - void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; - - if (node != node_cpuid[cpu].nid) - continue; - - memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *)__va(cpu_data) - - __per_cpu_start; - - /* - * percpu area for cpu0 is moved from the __init area - * which is setup by head.S and used till this point. - * Update ar.k3. This move is ensures that percpu - * area for cpu0 is on the correct node and its - * virtual address isn't insanely far from other - * percpu areas which is important for congruent - * percpu allocator. - */ - if (cpu == 0) - ia64_set_kr(IA64_KR_PER_CPU_DATA, - (unsigned long)cpu_data - - (unsigned long)__per_cpu_start); - - cpu_data += PERCPU_PAGE_SIZE; - } -#endif - return cpu_data; -} - -#ifdef CONFIG_SMP -/** - * setup_per_cpu_areas - setup percpu areas - * - * Arch code has already allocated and initialized percpu areas. All - * this function has to do is to teach the determined layout to the - * dynamic percpu allocator, which happens to be more complex than - * creating whole new ones using helpers. - */ -void __init setup_per_cpu_areas(void) -{ - struct pcpu_alloc_info *ai; - struct pcpu_group_info *gi; - unsigned int *cpu_map; - void *base; - unsigned long base_offset; - unsigned int cpu; - ssize_t static_size, reserved_size, dyn_size; - int node, prev_node, unit, nr_units; - - ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); - if (!ai) - panic("failed to allocate pcpu_alloc_info"); - cpu_map = ai->groups[0].cpu_map; - - /* determine base */ - base = (void *)ULONG_MAX; - for_each_possible_cpu(cpu) - base = min(base, - (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); - base_offset = (void *)__per_cpu_start - base; - - /* build cpu_map, units are grouped by node */ - unit = 0; - for_each_node(node) - for_each_possible_cpu(cpu) - if (node == node_cpuid[cpu].nid) - cpu_map[unit++] = cpu; - nr_units = unit; - - /* set basic parameters */ - static_size = __per_cpu_end - __per_cpu_start; - reserved_size = PERCPU_MODULE_RESERVE; - dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; - if (dyn_size < 0) - panic("percpu area overflow static=%zd reserved=%zd\n", - static_size, reserved_size); - - ai->static_size = static_size; - ai->reserved_size = reserved_size; - ai->dyn_size = dyn_size; - ai->unit_size = PERCPU_PAGE_SIZE; - ai->atom_size = PAGE_SIZE; - ai->alloc_size = PERCPU_PAGE_SIZE; - - /* - * CPUs are put into groups according to node. Walk cpu_map - * and create new groups at node boundaries. - */ - prev_node = NUMA_NO_NODE; - ai->nr_groups = 0; - for (unit = 0; unit < nr_units; unit++) { - cpu = cpu_map[unit]; - node = node_cpuid[cpu].nid; - - if (node == prev_node) { - gi->nr_units++; - continue; - } - prev_node = node; - - gi = &ai->groups[ai->nr_groups++]; - gi->nr_units = 1; - gi->base_offset = __per_cpu_offset[cpu] + base_offset; - gi->cpu_map = &cpu_map[unit]; - } - - pcpu_setup_first_chunk(ai, base); - pcpu_free_alloc_info(ai); -} -#endif - -/** - * fill_pernode - initialize pernode data. - * @node: the node id. - * @pernode: physical address of pernode data - * @pernodesize: size of the pernode data - */ -static void __init fill_pernode(int node, unsigned long pernode, - unsigned long pernodesize) -{ - void *cpu_data; - int cpus = early_nr_cpus_node(node); - - mem_data[node].pernode_addr = pernode; - mem_data[node].pernode_size = pernodesize; - memset(__va(pernode), 0, pernodesize); - - cpu_data = (void *)pernode; - pernode += PERCPU_PAGE_SIZE * cpus; - pernode += node * L1_CACHE_BYTES; - - pgdat_list[node] = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - mem_data[node].node_data = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - cpu_data = per_cpu_node_setup(cpu_data, node); - - return; -} - -/** - * find_pernode_space - allocate memory for memory map and per-node structures - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * This routine reserves space for the per-cpu data struct, the list of - * pg_data_ts and the per-node data struct. Each node will have something like - * the following in the first chunk of addr. space large enough to hold it. - * - * ________________________ - * | | - * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first - * | PERCPU_PAGE_SIZE * | start and length big enough - * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. - * |------------------------| - * | local pg_data_t * | - * |------------------------| - * | local ia64_node_data | - * |------------------------| - * | ??? | - * |________________________| - * - * Once this space has been set aside, the bootmem maps are initialized. We - * could probably move the allocation of the per-cpu and ia64_node_data space - * outside of this function and use alloc_bootmem_node(), but doing it here - * is straightforward and we get the alignments we want so... - */ -static int __init find_pernode_space(unsigned long start, unsigned long len, - int node) -{ - unsigned long spfn, epfn; - unsigned long pernodesize = 0, pernode; - - spfn = start >> PAGE_SHIFT; - epfn = (start + len) >> PAGE_SHIFT; - - /* - * Make sure this memory falls within this node's usable memory - * since we may have thrown some away in build_maps(). - */ - if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) - return 0; - - /* Don't setup this node's local space twice... */ - if (mem_data[node].pernode_addr) - return 0; - - /* - * Calculate total size needed, incl. what's necessary - * for good alignment and alias prevention. - */ - pernodesize = compute_pernodesize(node); - pernode = NODEDATA_ALIGN(start, node); - - /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize)) - fill_pernode(node, pernode, pernodesize); - - return 0; -} - -/** - * reserve_pernode_space - reserve memory for per-node space - * - * Reserve the space used by the bootmem maps & per-node space in the boot - * allocator so that when we actually create the real mem maps we don't - * use their memory. - */ -static void __init reserve_pernode_space(void) -{ - unsigned long base, size; - int node; - - for_each_online_node(node) { - if (node_isset(node, memory_less_mask)) - continue; - - /* Now the per-node space */ - size = mem_data[node].pernode_size; - base = __pa(mem_data[node].pernode_addr); - memblock_reserve(base, size); - } -} - -static void scatter_node_data(void) -{ - pg_data_t **dst; - int node; - - /* - * for_each_online_node() can't be used at here. - * node_online_map is not set for hot-added nodes at this time, - * because we are halfway through initialization of the new node's - * structures. If for_each_online_node() is used, a new node's - * pg_data_ptrs will be not initialized. Instead of using it, - * pgdat_list[] is checked. - */ - for_each_node(node) { - if (pgdat_list[node]) { - dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; - memcpy(dst, pgdat_list, sizeof(pgdat_list)); - } - } -} - -/** - * initialize_pernode_data - fixup per-cpu & per-node pointers - * - * Each node's per-node area has a copy of the global pg_data_t list, so - * we copy that to each node here, as well as setting the per-cpu pointer - * to the local node data structure. - */ -static void __init initialize_pernode_data(void) -{ - int cpu, node; - - scatter_node_data(); - -#ifdef CONFIG_SMP - /* Set the node_data pointer for each per-cpu struct */ - for_each_possible_early_cpu(cpu) { - node = node_cpuid[cpu].nid; - per_cpu(ia64_cpu_info, cpu).node_data = - mem_data[node].node_data; - } -#else - { - struct cpuinfo_ia64 *cpu0_cpu_info; - cpu = 0; - node = node_cpuid[cpu].nid; - cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&ia64_cpu_info - __per_cpu_start)); - cpu0_cpu_info->node_data = mem_data[node].node_data; - } -#endif /* CONFIG_SMP */ -} - -/** - * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit - * node but fall back to any other node when __alloc_bootmem_node fails - * for best. - * @nid: node id - * @pernodesize: size of this node's pernode data - */ -static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) -{ - void *ptr = NULL; - u8 best = 0xff; - int bestnode = NUMA_NO_NODE, node, anynode = 0; - - for_each_online_node(node) { - if (node_isset(node, memory_less_mask)) - continue; - else if (node_distance(nid, node) < best) { - best = node_distance(nid, node); - bestnode = node; - } - anynode = node; - } - - if (bestnode == NUMA_NO_NODE) - bestnode = anynode; - - ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, - __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, - bestnode); - if (!ptr) - panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n", - __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode, - __pa(MAX_DMA_ADDRESS)); - - return ptr; -} - -/** - * memory_less_nodes - allocate and initialize CPU only nodes pernode - * information. - */ -static void __init memory_less_nodes(void) -{ - unsigned long pernodesize; - void *pernode; - int node; - - for_each_node_mask(node, memory_less_mask) { - pernodesize = compute_pernodesize(node); - pernode = memory_less_node_alloc(node, pernodesize); - fill_pernode(node, __pa(pernode), pernodesize); - } - - return; -} - -/** - * find_memory - walk the EFI memory map and setup the bootmem allocator - * - * Called early in boot to setup the bootmem allocator, and to - * allocate the per-cpu and per-node structures. - */ -void __init find_memory(void) -{ - int node; - - reserve_memory(); - efi_memmap_walk(filter_memory, register_active_ranges); - - if (num_online_nodes() == 0) { - printk(KERN_ERR "node info missing!\n"); - node_set_online(0); - } - - nodes_or(memory_less_mask, memory_less_mask, node_online_map); - min_low_pfn = -1; - max_low_pfn = 0; - - /* These actually end up getting called by call_pernode_memory() */ - efi_memmap_walk(filter_rsvd_memory, build_node_maps); - efi_memmap_walk(filter_rsvd_memory, find_pernode_space); - efi_memmap_walk(find_max_min_low_pfn, NULL); - - for_each_online_node(node) - if (mem_data[node].min_pfn) - node_clear(node, memory_less_mask); - - reserve_pernode_space(); - memory_less_nodes(); - initialize_pernode_data(); - - max_pfn = max_low_pfn; - - find_initrd(); -} - -#ifdef CONFIG_SMP -/** - * per_cpu_init - setup per-cpu variables - * - * find_pernode_space() does most of this already, we just need to set - * local_per_cpu_offset - */ -void *per_cpu_init(void) -{ - int cpu; - static int first_time = 1; - - if (first_time) { - first_time = 0; - for_each_possible_early_cpu(cpu) - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - } - - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; -} -#endif /* CONFIG_SMP */ - -/** - * call_pernode_memory - use SRAT to call callback functions with node info - * @start: physical start of range - * @len: length of range - * @arg: function to call for each range - * - * efi_memmap_walk() knows nothing about layout of memory across nodes. Find - * out to which node a block of memory belongs. Ignore memory that we cannot - * identify, and split blocks that run across multiple nodes. - * - * Take this opportunity to round the start address up and the end address - * down to page boundaries. - */ -void call_pernode_memory(unsigned long start, unsigned long len, void *arg) -{ - unsigned long rs, re, end = start + len; - void (*func)(unsigned long, unsigned long, int); - int i; - - start = PAGE_ALIGN(start); - end &= PAGE_MASK; - if (start >= end) - return; - - func = arg; - - if (!num_node_memblks) { - /* No SRAT table, so assume one node (node 0) */ - if (start < end) - (*func)(start, end - start, 0); - return; - } - - for (i = 0; i < num_node_memblks; i++) { - rs = max(start, node_memblk[i].start_paddr); - re = min(end, node_memblk[i].start_paddr + - node_memblk[i].size); - - if (rs < re) - (*func)(rs, re - rs, node_memblk[i].nid); - - if (re == end) - break; - } -} - -/** - * paging_init - setup page tables - * - * paging_init() sets up the page tables for each node of the system and frees - * the bootmem allocator memory for general use. - */ -void __init paging_init(void) -{ - unsigned long max_dma; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - sparse_init(); - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA32] = max_dma; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init(max_zone_pfns); - - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); -} - -pg_data_t * __init arch_alloc_nodedata(int nid) -{ - unsigned long size = compute_pernodesize(nid); - - return memblock_alloc(size, SMP_CACHE_BYTES); -} - -void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) -{ - pgdat_list[update_node] = update_pgdat; - scatter_node_data(); -} - -#ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) -{ - return vmemmap_populate_basepages(start, end, node, NULL); -} - -void vmemmap_free(unsigned long start, unsigned long end, - struct vmem_altmap *altmap) -{ -} -#endif diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c deleted file mode 100644 index da477c11770b..000000000000 --- a/arch/ia64/mm/extable.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. - * - * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - -#include <asm/ptrace.h> -#include <asm/extable.h> -#include <asm/errno.h> -#include <asm/processor.h> - -void -ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) -{ - long fix = (u64) &e->fixup + e->fixup; - - regs->r8 = -EFAULT; - if (fix & 4) - regs->r9 = 0; - regs->cr_iip = fix & ~0xf; - ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */ -} diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c deleted file mode 100644 index 5458b52b4009..000000000000 --- a/arch/ia64/mm/fault.c +++ /dev/null @@ -1,251 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MMU fault handling support. - * - * Copyright (C) 1998-2002 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ -#include <linux/sched/signal.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/extable.h> -#include <linux/interrupt.h> -#include <linux/kprobes.h> -#include <linux/kdebug.h> -#include <linux/prefetch.h> -#include <linux/uaccess.h> -#include <linux/perf_event.h> - -#include <asm/processor.h> -#include <asm/exception.h> - -extern int die(char *, struct pt_regs *, long); - -/* - * Return TRUE if ADDRESS points at a page in the kernel's mapped segment - * (inside region 5, on ia64) and that page is present. - */ -static int -mapped_kernel_page_is_present (unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep, pte; - - pgd = pgd_offset_k(address); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - return 0; - - p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d) || p4d_bad(*p4d)) - return 0; - - pud = pud_offset(p4d, address); - if (pud_none(*pud) || pud_bad(*pud)) - return 0; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - return 0; - - ptep = pte_offset_kernel(pmd, address); - if (!ptep) - return 0; - - pte = *ptep; - return pte_present(pte); -} - -# define VM_READ_BIT 0 -# define VM_WRITE_BIT 1 -# define VM_EXEC_BIT 2 - -void __kprobes -ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) -{ - int signal = SIGSEGV, code = SEGV_MAPERR; - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long mask; - vm_fault_t fault; - unsigned int flags = FAULT_FLAG_DEFAULT; - - mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) - | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - - /* mmap_lock is performance critical.... */ - prefetchw(&mm->mmap_lock); - - /* - * If we're in an interrupt or have no user context, we must not take the fault.. - */ - if (faulthandler_disabled() || !mm) - goto no_context; - - /* - * This is to handle the kprobes on user space access instructions - */ - if (kprobe_page_fault(regs, TRAP_BRKPT)) - return; - - if (user_mode(regs)) - flags |= FAULT_FLAG_USER; - if (mask & VM_WRITE) - flags |= FAULT_FLAG_WRITE; - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); -retry: - mmap_read_lock(mm); - - vma = find_vma_prev(mm, address, &prev_vma); - if (!vma && !prev_vma ) - goto bad_area; - - /* - * find_vma_prev() returns vma such that address < vma->vm_end or NULL - * - * May find no vma, but could be that the last vm area is the - * register backing store that needs to expand upwards, in - * this case vma will be null, but prev_vma will ne non-null - */ - if (( !vma && prev_vma ) || (address < vma->vm_start) ) { - vma = expand_stack(mm, address); - if (!vma) - goto bad_area_nosemaphore; - } - - code = SEGV_ACCERR; - - /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ - -# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ - || (1 << VM_EXEC_BIT) != VM_EXEC) -# error File is out of sync with <linux/mm.h>. Please update. -# endif - - if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE)))) - goto bad_area; - - if ((vma->vm_flags & mask) != mask) - goto bad_area; - - /* - * If for any reason at all we couldn't handle the fault, make - * sure we exit gracefully rather than endlessly redo the - * fault. - */ - fault = handle_mm_fault(vma, address, flags, regs); - - if (fault_signal_pending(fault, regs)) { - if (!user_mode(regs)) - goto no_context; - return; - } - - /* The fault is fully completed (including releasing mmap lock) */ - if (fault & VM_FAULT_COMPLETED) - return; - - if (unlikely(fault & VM_FAULT_ERROR)) { - /* - * We ran out of memory, or some other thing happened - * to us that made us unable to handle the page fault - * gracefully. - */ - if (fault & VM_FAULT_OOM) { - goto out_of_memory; - } else if (fault & VM_FAULT_SIGSEGV) { - goto bad_area; - } else if (fault & VM_FAULT_SIGBUS) { - signal = SIGBUS; - goto bad_area; - } - BUG(); - } - - if (fault & VM_FAULT_RETRY) { - flags |= FAULT_FLAG_TRIED; - - /* No need to mmap_read_unlock(mm) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - - goto retry; - } - - mmap_read_unlock(mm); - return; - - bad_area: - mmap_read_unlock(mm); - bad_area_nosemaphore: - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - if (user_mode(regs)) { - force_sig_fault(signal, code, (void __user *) address, - 0, __ISR_VALID, isr); - return; - } - - no_context: - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - - /* - * Since we have no vma's for region 5, we might get here even if the address is - * valid, due to the VHPT walker inserting a non present translation that becomes - * stale. If that happens, the non present fault handler already purged the stale - * translation, which fixed the problem. So, we check to see if the translation is - * valid, and return if it is. - */ - if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) - return; - - if (ia64_done_with_exception(regs)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to terminate things - * with extreme prejudice. - */ - bust_spinlocks(1); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); - else - printk(KERN_ALERT "Unable to handle kernel paging request at " - "virtual address %016lx\n", address); - if (die("Oops", regs, isr)) - regs = NULL; - bust_spinlocks(0); - if (regs) - make_task_dead(SIGKILL); - return; - - out_of_memory: - mmap_read_unlock(mm); - if (!user_mode(regs)) - goto no_context; - pagefault_out_of_memory(); -} diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c deleted file mode 100644 index adc49f2d22e8..000000000000 --- a/arch/ia64/mm/hugetlbpage.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * IA-64 Huge TLB Page Support for Kernel. - * - * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com> - * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com> - * - * Sep, 2003: add numa support - * Feb, 2004: dynamic hugetlb page size via boot parameter - */ - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/pagemap.h> -#include <linux/module.h> -#include <linux/sysctl.h> -#include <linux/log2.h> -#include <asm/mman.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> - -unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; -EXPORT_SYMBOL(hpage_shift); - -pte_t * -huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long sz) -{ - unsigned long taddr = htlbpage_to_page(addr); - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, taddr); - p4d = p4d_offset(pgd, taddr); - pud = pud_alloc(mm, p4d, taddr); - if (pud) { - pmd = pmd_alloc(mm, pud, taddr); - if (pmd) - pte = pte_alloc_huge(mm, pmd, taddr); - } - return pte; -} - -pte_t * -huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) -{ - unsigned long taddr = htlbpage_to_page(addr); - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, taddr); - if (pgd_present(*pgd)) { - p4d = p4d_offset(pgd, taddr); - if (p4d_present(*p4d)) { - pud = pud_offset(p4d, taddr); - if (pud_present(*pud)) { - pmd = pmd_offset(pud, taddr); - if (pmd_present(*pmd)) - pte = pte_offset_huge(pmd, taddr); - } - } - } - - return pte; -} - -#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } - -/* - * Don't actually need to do any preparation, but need to make sure - * the address is in the right region. - */ -int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - if (REGION_NUMBER(addr) != RGN_HPAGE) - return -EINVAL; - - return 0; -} - -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - /* - * This is called to free hugetlb page tables. - * - * The offset of these addresses from the base of the hugetlb - * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that - * the standard free_pgd_range will free the right page tables. - * - * If floor and ceiling are also in the hugetlb region, they - * must likewise be scaled down; but if outside, left unchanged. - */ - - addr = htlbpage_to_page(addr); - end = htlbpage_to_page(end); - if (REGION_NUMBER(floor) == RGN_HPAGE) - floor = htlbpage_to_page(floor); - if (REGION_NUMBER(ceiling) == RGN_HPAGE) - ceiling = htlbpage_to_page(ceiling); - - free_pgd_range(tlb, addr, end, floor, ceiling); -} - -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct vm_unmapped_area_info info; - - if (len > RGN_MAP_LIMIT) - return -ENOMEM; - if (len & ~HPAGE_MASK) - return -EINVAL; - - /* Handle MAP_FIXED */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - /* This code assumes that RGN_HPAGE != 0. */ - if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) - addr = HPAGE_REGION_BASE; - - info.flags = 0; - info.length = len; - info.low_limit = addr; - info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT; - info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1); - info.align_offset = 0; - return vm_unmapped_area(&info); -} - -static int __init hugetlb_setup_sz(char *str) -{ - u64 tr_pages; - unsigned long long size; - - if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0) - /* - * shouldn't happen, but just in case. - */ - tr_pages = 0x15557000UL; - - size = memparse(str, &str); - if (*str || !is_power_of_2(size) || !(tr_pages & size) || - size <= PAGE_SIZE || - size > (1UL << PAGE_SHIFT << MAX_ORDER)) { - printk(KERN_WARNING "Invalid huge page size specified\n"); - return 1; - } - - hpage_shift = __ffs(size); - /* - * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT - * override here with new page shift. - */ - ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2); - return 0; -} -early_param("hugepagesz", hugetlb_setup_sz); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c deleted file mode 100644 index 05b0f2f0c073..000000000000 --- a/arch/ia64/mm/init.c +++ /dev/null @@ -1,532 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Initialize MMU support. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ -#include <linux/kernel.h> -#include <linux/init.h> - -#include <linux/dma-map-ops.h> -#include <linux/dmar.h> -#include <linux/efi.h> -#include <linux/elf.h> -#include <linux/memblock.h> -#include <linux/mm.h> -#include <linux/sched/signal.h> -#include <linux/mmzone.h> -#include <linux/module.h> -#include <linux/personality.h> -#include <linux/reboot.h> -#include <linux/slab.h> -#include <linux/swap.h> -#include <linux/proc_fs.h> -#include <linux/bitops.h> -#include <linux/kexec.h> -#include <linux/swiotlb.h> - -#include <asm/dma.h> -#include <asm/efi.h> -#include <asm/io.h> -#include <asm/numa.h> -#include <asm/patch.h> -#include <asm/pgalloc.h> -#include <asm/sal.h> -#include <asm/sections.h> -#include <asm/tlb.h> -#include <linux/uaccess.h> -#include <asm/unistd.h> -#include <asm/mca.h> - -extern void ia64_tlb_init (void); - -unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; - -struct page *zero_page_memmap_ptr; /* map entry for zero page */ -EXPORT_SYMBOL(zero_page_memmap_ptr); - -void -__ia64_sync_icache_dcache (pte_t pte) -{ - unsigned long addr; - struct folio *folio; - - folio = page_folio(pte_page(pte)); - addr = (unsigned long)folio_address(folio); - - if (test_bit(PG_arch_1, &folio->flags)) - return; /* i-cache is already coherent with d-cache */ - - flush_icache_range(addr, addr + folio_size(folio)); - set_bit(PG_arch_1, &folio->flags); /* mark page as clean */ -} - -/* - * Since DMA is i-cache coherent, any (complete) folios that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -void arch_dma_mark_clean(phys_addr_t paddr, size_t size) -{ - unsigned long pfn = PHYS_PFN(paddr); - struct folio *folio = page_folio(pfn_to_page(pfn)); - ssize_t left = size; - size_t offset = offset_in_folio(folio, paddr); - - if (offset) { - left -= folio_size(folio) - offset; - if (left <= 0) - return; - folio = folio_next(folio); - } - - while (left >= (ssize_t)folio_size(folio)) { - left -= folio_size(folio); - set_bit(PG_arch_1, &pfn_to_page(pfn)->flags); - if (!left) - break; - folio = folio_next(folio); - } -} - -inline void -ia64_set_rbs_bot (void) -{ - unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; - - if (stack_size > MAX_USER_STACK_SIZE) - stack_size = MAX_USER_STACK_SIZE; - current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size); -} - -/* - * This performs some platform-dependent address space initialization. - * On IA-64, we want to setup the VM area for the register backing - * store (which grows upwards) and install the gateway page which is - * used for signal trampolines, etc. - */ -void -ia64_init_addr_space (void) -{ - struct vm_area_struct *vma; - - ia64_set_rbs_bot(); - - /* - * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore - * the problem. When the process attempts to write to the register backing store - * for the first time, it will get a SEGFAULT in this case. - */ - vma = vm_area_alloc(current->mm); - if (vma) { - vma_set_anonymous(vma); - vma->vm_start = current->thread.rbs_bot & PAGE_MASK; - vma->vm_end = vma->vm_start + PAGE_SIZE; - vm_flags_init(vma, VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT); - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - mmap_write_lock(current->mm); - if (insert_vm_struct(current->mm, vma)) { - mmap_write_unlock(current->mm); - vm_area_free(vma); - return; - } - mmap_write_unlock(current->mm); - } - - /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ - if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = vm_area_alloc(current->mm); - if (vma) { - vma_set_anonymous(vma); - vma->vm_end = PAGE_SIZE; - vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vm_flags_init(vma, VM_READ | VM_MAYREAD | VM_IO | - VM_DONTEXPAND | VM_DONTDUMP); - mmap_write_lock(current->mm); - if (insert_vm_struct(current->mm, vma)) { - mmap_write_unlock(current->mm); - vm_area_free(vma); - return; - } - mmap_write_unlock(current->mm); - } - } -} - -void -free_initmem (void) -{ - free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), - -1, "unused kernel"); -} - -void __init -free_initrd_mem (unsigned long start, unsigned long end) -{ - /* - * EFI uses 4KB pages while the kernel can use 4KB or bigger. - * Thus EFI and the kernel may have different page sizes. It is - * therefore possible to have the initrd share the same page as - * the end of the kernel (given current setup). - * - * To avoid freeing/using the wrong page (kernel sized) we: - * - align up the beginning of initrd - * - align down the end of initrd - * - * | | - * |=============| a000 - * | | - * | | - * | | 9000 - * |/////////////| - * |/////////////| - * |=============| 8000 - * |///INITRD////| - * |/////////////| - * |/////////////| 7000 - * | | - * |KKKKKKKKKKKKK| - * |=============| 6000 - * |KKKKKKKKKKKKK| - * |KKKKKKKKKKKKK| - * K=kernel using 8KB pages - * - * In this example, we must free page 8000 ONLY. So we must align up - * initrd_start and keep initrd_end as is. - */ - start = PAGE_ALIGN(start); - end = end & PAGE_MASK; - - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - if (!virt_addr_valid(start)) - continue; - free_reserved_page(virt_to_page(start)); - } -} - -/* - * This installs a clean page in the kernel's page table. - */ -static struct page * __init -put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ - - { - p4d = p4d_alloc(&init_mm, pgd, address); - if (!p4d) - goto out; - pud = pud_alloc(&init_mm, p4d, address); - if (!pud) - goto out; - pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - goto out; - pte = pte_alloc_kernel(pmd, address); - if (!pte) - goto out; - if (!pte_none(*pte)) - goto out; - set_pte(pte, mk_pte(page, pgprot)); - } - out: - /* no need for flush_tlb */ - return page; -} - -static void __init -setup_gate (void) -{ - struct page *page; - - /* - * Map the gate page twice: once read-only to export the ELF - * headers etc. and once execute-only page to enable - * privilege-promotion via "epc": - */ - page = virt_to_page(ia64_imva(__start_gate_section)); - put_kernel_page(page, GATE_ADDR, PAGE_READONLY); -#ifdef HAVE_BUGGY_SEGREL - page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); - put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); -#else - put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); - /* Fill in the holes (if any) with read-only zero pages: */ - { - unsigned long addr; - - for (addr = GATE_ADDR + PAGE_SIZE; - addr < GATE_ADDR + PERCPU_PAGE_SIZE; - addr += PAGE_SIZE) - { - put_kernel_page(ZERO_PAGE(0), addr, - PAGE_READONLY); - put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE, - PAGE_READONLY); - } - } -#endif - ia64_patch_gate(); -} - -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - vma_init(&gate_vma, NULL); - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC); - gate_vma.vm_page_prot = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX); - - return 0; -} -__initcall(gate_vma_init); - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return &gate_vma; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) - return 1; - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return in_gate_area_no_mm(addr); -} - -void ia64_mmu_init(void *my_cpu_data) -{ - unsigned long pta, impl_va_bits; - extern void tlb_init(void); - -#ifdef CONFIG_DISABLE_VHPT -# define VHPT_ENABLE_BIT 0 -#else -# define VHPT_ENABLE_BIT 1 -#endif - - /* - * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped - * address space. The IA-64 architecture guarantees that at least 50 bits of - * virtual address space are implemented but if we pick a large enough page size - * (e.g., 64KB), the mapped address space is big enough that it will overlap with - * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, - * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a - * problem in practice. Alternatively, we could truncate the top of the mapped - * address space to not permit mappings that would overlap with the VMLPT. - * --davidm 00/12/06 - */ -# define pte_bits 3 -# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) - /* - * The virtual page table has to cover the entire implemented address space within - * a region even though not all of this space may be mappable. The reason for - * this is that the Access bit and Dirty bit fault handlers perform - * non-speculative accesses to the virtual page table, so the address range of the - * virtual page table itself needs to be covered by virtual page table. - */ -# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) -# define POW2(n) (1ULL << (n)) - - impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); - - if (impl_va_bits < 51 || impl_va_bits > 61) - panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); - /* - * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need, - * which must fit into "vmlpt_bits - pte_bits" slots. Second half of - * the test makes sure that our mapped space doesn't overlap the - * unimplemented hole in the middle of the region. - */ - if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) || - (mapped_space_bits > impl_va_bits - 1)) - panic("Cannot build a big enough virtual-linear page table" - " to cover mapped address space.\n" - " Try using a smaller page size.\n"); - - - /* place the VMLPT at the end of each page-table mapped region: */ - pta = POW2(61) - POW2(vmlpt_bits); - - /* - * Set the (virtually mapped linear) page table address. Bit - * 8 selects between the short and long format, bits 2-7 the - * size of the table, and bit 0 whether the VHPT walker is - * enabled. - */ - ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); - - ia64_tlb_init(); - -#ifdef CONFIG_HUGETLB_PAGE - ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); - ia64_srlz_d(); -#endif -} - -int __init register_active_ranges(u64 start, u64 len, int nid) -{ - u64 end = start + len; - -#ifdef CONFIG_KEXEC - if (start > crashk_res.start && start < crashk_res.end) - start = crashk_res.end; - if (end > crashk_res.start && end < crashk_res.end) - end = crashk_res.start; -#endif - - if (start < end) - memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE); - return 0; -} - -int -find_max_min_low_pfn (u64 start, u64 end, void *arg) -{ - unsigned long pfn_start, pfn_end; -#ifdef CONFIG_FLATMEM - pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT; - pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT; -#else - pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT; - pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT; -#endif - min_low_pfn = min(min_low_pfn, pfn_start); - max_low_pfn = max(max_low_pfn, pfn_end); - return 0; -} - -/* - * Boot command-line option "nolwsys" can be used to disable the use of any light-weight - * system call handler. When this option is in effect, all fsyscalls will end up bubbling - * down into the kernel and calling the normal (heavy-weight) syscall handler. This is - * useful for performance testing, but conceivably could also come in handy for debugging - * purposes. - */ - -static int nolwsys __initdata; - -static int __init -nolwsys_setup (char *s) -{ - nolwsys = 1; - return 1; -} - -__setup("nolwsys", nolwsys_setup); - -void __init -mem_init (void) -{ - int i; - - BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); - BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE); - BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE); - - /* - * This needs to be called _after_ the command line has been parsed but - * _before_ any drivers that may need the PCI DMA interface are - * initialized or bootmem has been freed. - */ - do { -#ifdef CONFIG_INTEL_IOMMU - detect_intel_iommu(); - if (iommu_detected) - break; -#endif - swiotlb_init(true, SWIOTLB_VERBOSE); - } while (0); - -#ifdef CONFIG_FLATMEM - BUG_ON(!mem_map); -#endif - - set_max_mapnr(max_low_pfn); - high_memory = __va(max_low_pfn * PAGE_SIZE); - memblock_free_all(); - - /* - * For fsyscall entrypoints with no light-weight handler, use the ordinary - * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry - * code can tell them apart. - */ - for (i = 0; i < NR_syscalls; ++i) { - extern unsigned long fsyscall_table[NR_syscalls]; - extern unsigned long sys_call_table[NR_syscalls]; - - if (!fsyscall_table[i] || nolwsys) - fsyscall_table[i] = sys_call_table[i] | 1; - } - setup_gate(); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - int ret; - - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) - return -EINVAL; - - ret = __add_pages(nid, start_pfn, nr_pages, params); - if (ret) - printk("%s: Problem encountered in __add_pages() as ret=%d\n", - __func__, ret); - - return ret; -} - -void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - __remove_pages(start_pfn, nr_pages, altmap); -} -#endif - -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_READONLY, - [VM_WRITE] = PAGE_READONLY, - [VM_WRITE | VM_READ] = PAGE_READONLY, - [VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_X_RX), - [VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RX), - [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_READONLY, - [VM_SHARED | VM_WRITE] = PAGE_SHARED, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, - [VM_SHARED | VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_X_RX), - [VM_SHARED | VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RX), - [VM_SHARED | VM_EXEC | VM_WRITE] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RWX), - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RWX) -}; -DECLARE_VM_GET_PAGE_PROT diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c deleted file mode 100644 index 711b6abc822e..000000000000 --- a/arch/ia64/mm/ioremap.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas <bjorn.helgaas@hp.com> - */ - -#include <linux/compiler.h> -#include <linux/module.h> -#include <linux/efi.h> -#include <linux/io.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <asm/io.h> -#include <asm/meminit.h> - -static inline void __iomem * -__ioremap_uc(unsigned long phys_addr) -{ - return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); -} - -void __iomem * -early_ioremap (unsigned long phys_addr, unsigned long size) -{ - u64 attr; - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - return __ioremap_uc(phys_addr); -} - -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long flags) -{ - u64 attr; - unsigned long gran_base, gran_size; - unsigned long page_base; - - /* - * For things in kern_memmap, we must use the same attribute - * as the rest of the kernel. For more details, see - * Documentation/arch/ia64/aliasing.rst. - */ - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - else if (attr & EFI_MEMORY_UC) - return __ioremap_uc(phys_addr); - - /* - * Some chipsets don't support UC access to memory. If - * WB is supported for the whole granule, we prefer that. - */ - gran_base = GRANULEROUNDDOWN(phys_addr); - gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; - if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - - /* - * WB is not supported for the whole granule, so we can't use - * the region 7 identity mapping. If we can safely cover the - * area with kernel page table mappings, we can use those - * instead. - */ - page_base = phys_addr & PAGE_MASK; - size = PAGE_ALIGN(phys_addr + size) - page_base; - if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) - return generic_ioremap_prot(phys_addr, size, __pgprot(flags)); - - return __ioremap_uc(phys_addr); -} -EXPORT_SYMBOL(ioremap_prot); - -void __iomem * -ioremap_uc(unsigned long phys_addr, unsigned long size) -{ - if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) - return NULL; - - return __ioremap_uc(phys_addr); -} -EXPORT_SYMBOL(ioremap_uc); - -void -early_iounmap (volatile void __iomem *addr, unsigned long size) -{ -} - -void iounmap(volatile void __iomem *addr) -{ - if (REGION_NUMBER(addr) == RGN_GATE) - vunmap((void *) ((unsigned long) addr & PAGE_MASK)); -} -EXPORT_SYMBOL(iounmap); diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c deleted file mode 100644 index 4c7b1f50e3b7..000000000000 --- a/arch/ia64/mm/numa.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * This file contains NUMA specific variables and functions which are used on - * NUMA machines with contiguous memory. - * - * 2002/08/07 Erich Focht <efocht@ess.nec.de> - */ - -#include <linux/cpu.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/node.h> -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/module.h> -#include <asm/mmzone.h> -#include <asm/numa.h> - - -/* - * The following structures are usually initialized by ACPI or - * similar mechanisms and describe the NUMA characteristics of the machine. - */ -int num_node_memblks; -struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; -struct node_cpuid_s node_cpuid[NR_CPUS] = - { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; - -/* - * This is a matrix with "distances" between nodes, they should be - * proportional to the memory access latency ratios. - */ -u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; - -int __node_distance(int from, int to) -{ - return slit_distance(from, to); -} -EXPORT_SYMBOL(__node_distance); - -/* Identify which cnode a physical address resides on */ -int -paddr_to_nid(unsigned long paddr) -{ - int i; - - for (i = 0; i < num_node_memblks; i++) - if (paddr >= node_memblk[i].start_paddr && - paddr < node_memblk[i].start_paddr + node_memblk[i].size) - break; - - return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); -} -EXPORT_SYMBOL(paddr_to_nid); - -#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) -void numa_clear_node(int cpu) -{ - unmap_cpu_from_node(cpu, NUMA_NO_NODE); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -/* - * SRAT information is stored in node_memblk[], then we can use SRAT - * information at memory-hot-add if necessary. - */ - -int memory_add_physaddr_to_nid(u64 addr) -{ - int nid = paddr_to_nid(addr); - if (nid < 0) - return 0; - return nid; -} -EXPORT_SYMBOL(memory_add_physaddr_to_nid); -#endif -#endif diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c deleted file mode 100644 index ca060e7a2a46..000000000000 --- a/arch/ia64/mm/tlb.c +++ /dev/null @@ -1,591 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * TLB support routines. - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 08/02/00 A. Mallick <asit.k.mallick@intel.com> - * Modified RID allocation for SMP - * Goutham Rao <goutham.rao@intel.com> - * IPI based ptc implementation and A-step IPI implementation. - * Rohit Seth <rohit.seth@intel.com> - * Ken Chen <kenneth.w.chen@intel.com> - * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation - * Copyright (C) 2007 Intel Corp - * Fenghua Yu <fenghua.yu@intel.com> - * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. - */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/mm.h> -#include <linux/memblock.h> -#include <linux/slab.h> - -#include <asm/delay.h> -#include <asm/mmu_context.h> -#include <asm/pal.h> -#include <asm/tlbflush.h> -#include <asm/dma.h> -#include <asm/processor.h> -#include <asm/sal.h> -#include <asm/tlb.h> - -static struct { - u64 mask; /* mask of supported purge page-sizes */ - unsigned long max_bits; /* log2 of largest supported purge page-size */ -} purge; - -struct ia64_ctx ia64_ctx = { - .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), - .next = 1, - .max_ctx = ~0U -}; - -DEFINE_PER_CPU(u8, ia64_need_tlb_flush); -DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ -DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ - -struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; - -/* - * Initializes the ia64_ctx.bitmap array based on max_ctx+1. - * Called after cpu_init() has setup ia64_ctx.max_ctx based on - * maximum RID that is supported by boot CPU. - */ -void __init -mmu_context_init (void) -{ - ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, - SMP_CACHE_BYTES); - if (!ia64_ctx.bitmap) - panic("%s: Failed to allocate %u bytes\n", __func__, - (ia64_ctx.max_ctx + 1) >> 3); - ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, - SMP_CACHE_BYTES); - if (!ia64_ctx.flushmap) - panic("%s: Failed to allocate %u bytes\n", __func__, - (ia64_ctx.max_ctx + 1) >> 3); -} - -/* - * Acquire the ia64_ctx.lock before calling this function! - */ -void -wrap_mmu_context (struct mm_struct *mm) -{ - int i, cpu; - unsigned long flush_bit; - - for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { - flush_bit = xchg(&ia64_ctx.flushmap[i], 0); - ia64_ctx.bitmap[i] ^= flush_bit; - } - - /* use offset at 300 to skip daemons */ - ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, 300); - ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - - /* - * can't call flush_tlb_all() here because of race condition - * with O(1) scheduler [EF] - */ - cpu = get_cpu(); /* prevent preemption/migration */ - for_each_online_cpu(i) - if (i != cpu) - per_cpu(ia64_need_tlb_flush, i) = 1; - put_cpu(); - local_flush_tlb_all(); -} - -/* - * Implement "spinaphores" ... like counting semaphores, but they - * spin instead of sleeping. If there are ever any other users for - * this primitive it can be moved up to a spinaphore.h header. - */ -struct spinaphore { - unsigned long ticket; - unsigned long serve; -}; - -static inline void spinaphore_init(struct spinaphore *ss, int val) -{ - ss->ticket = 0; - ss->serve = val; -} - -static inline void down_spin(struct spinaphore *ss) -{ - unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; - - if (time_before(t, ss->serve)) - return; - - ia64_invala(); - - for (;;) { - asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); - if (time_before(t, serve)) - return; - cpu_relax(); - } -} - -static inline void up_spin(struct spinaphore *ss) -{ - ia64_fetchadd(1, &ss->serve, rel); -} - -static struct spinaphore ptcg_sem; -static u16 nptcg = 1; -static int need_ptcg_sem = 1; -static int toolatetochangeptcgsem = 0; - -/* - * Kernel parameter "nptcg=" overrides max number of concurrent global TLB - * purges which is reported from either PAL or SAL PALO. - * - * We don't have sanity checking for nptcg value. It's the user's responsibility - * for valid nptcg value on the platform. Otherwise, kernel may hang in some - * cases. - */ -static int __init -set_nptcg(char *str) -{ - int value = 0; - - get_option(&str, &value); - setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); - - return 1; -} - -__setup("nptcg=", set_nptcg); - -/* - * Maximum number of simultaneous ptc.g purges in the system can - * be defined by PAL_VM_SUMMARY (in which case we should take - * the smallest value for any cpu in the system) or by the PAL - * override table (in which case we should ignore the value from - * PAL_VM_SUMMARY). - * - * Kernel parameter "nptcg=" overrides maximum number of simultaneous ptc.g - * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, - * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. - * - * Complicating the logic here is the fact that num_possible_cpus() - * isn't fully setup until we start bringing cpus online. - */ -void -setup_ptcg_sem(int max_purges, int nptcg_from) -{ - static int kp_override; - static int palo_override; - static int firstcpu = 1; - - if (toolatetochangeptcgsem) { - if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0) - BUG_ON(1 < nptcg); - else - BUG_ON(max_purges < nptcg); - return; - } - - if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { - kp_override = 1; - nptcg = max_purges; - goto resetsema; - } - if (kp_override) { - need_ptcg_sem = num_possible_cpus() > nptcg; - return; - } - - if (nptcg_from == NPTCG_FROM_PALO) { - palo_override = 1; - - /* In PALO max_purges == 0 really means it! */ - if (max_purges == 0) - panic("Whoa! Platform does not support global TLB purges.\n"); - nptcg = max_purges; - if (nptcg == PALO_MAX_TLB_PURGES) { - need_ptcg_sem = 0; - return; - } - goto resetsema; - } - if (palo_override) { - if (nptcg != PALO_MAX_TLB_PURGES) - need_ptcg_sem = (num_possible_cpus() > nptcg); - return; - } - - /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ - if (max_purges == 0) max_purges = 1; - - if (firstcpu) { - nptcg = max_purges; - firstcpu = 0; - } - if (max_purges < nptcg) - nptcg = max_purges; - if (nptcg == PAL_MAX_PURGES) { - need_ptcg_sem = 0; - return; - } else - need_ptcg_sem = (num_possible_cpus() > nptcg); - -resetsema: - spinaphore_init(&ptcg_sem, max_purges); -} - -#ifdef CONFIG_SMP -static void -ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long nbits) -{ - struct mm_struct *active_mm = current->active_mm; - - toolatetochangeptcgsem = 1; - - if (mm != active_mm) { - /* Restore region IDs for mm */ - if (mm && active_mm) { - activate_context(mm); - } else { - flush_tlb_all(); - return; - } - } - - if (need_ptcg_sem) - down_spin(&ptcg_sem); - - do { - /* - * Flush ALAT entries also. - */ - ia64_ptcga(start, (nbits << 2)); - ia64_srlz_i(); - start += (1UL << nbits); - } while (start < end); - - if (need_ptcg_sem) - up_spin(&ptcg_sem); - - if (mm != active_mm) { - activate_context(active_mm); - } -} -#endif /* CONFIG_SMP */ - -void -local_flush_tlb_all (void) -{ - unsigned long i, j, flags, count0, count1, stride0, stride1, addr; - - addr = local_cpu_data->ptce_base; - count0 = local_cpu_data->ptce_count[0]; - count1 = local_cpu_data->ptce_count[1]; - stride0 = local_cpu_data->ptce_stride[0]; - stride1 = local_cpu_data->ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -static void -__flush_tlb_range (struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long size = end - start; - unsigned long nbits; - -#ifndef CONFIG_SMP - if (mm != current->active_mm) { - mm->context = 0; - return; - } -#endif - - nbits = ia64_fls(size + 0xfff); - while (unlikely (((1UL << nbits) & purge.mask) == 0) && - (nbits < purge.max_bits)) - ++nbits; - if (nbits > purge.max_bits) - nbits = purge.max_bits; - start &= ~((1UL << nbits) - 1); - - preempt_disable(); -#ifdef CONFIG_SMP - if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { - ia64_global_tlb_purge(mm, start, end, nbits); - preempt_enable(); - return; - } -#endif - do { - ia64_ptcl(start, (nbits<<2)); - start += (1UL << nbits); - } while (start < end); - preempt_enable(); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (unlikely(end - start >= 1024*1024*1024*1024UL - || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) { - /* - * If we flush more than a tera-byte or across regions, we're - * probably better off just flushing the entire TLB(s). This - * should be very rare and is not worth optimizing for. - */ - flush_tlb_all(); - } else { - /* flush the address range from the tlb */ - __flush_tlb_range(vma, start, end); - /* flush the virt. page-table area mapping the addr range */ - __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end)); - } -} -EXPORT_SYMBOL(flush_tlb_range); - -void ia64_tlb_init(void) -{ - ia64_ptce_info_t ptce_info; - u64 tr_pgbits; - long status; - pal_vm_info_1_u_t vm_info_1; - pal_vm_info_2_u_t vm_info_2; - int cpu = smp_processor_id(); - - if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { - printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " - "defaulting to architected purge page-sizes.\n", status); - purge.mask = 0x115557000UL; - } - purge.max_bits = ia64_fls(purge.mask); - - ia64_get_ptce(&ptce_info); - local_cpu_data->ptce_base = ptce_info.base; - local_cpu_data->ptce_count[0] = ptce_info.count[0]; - local_cpu_data->ptce_count[1] = ptce_info.count[1]; - local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; - local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; - - local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ - status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); - - if (status) { - printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); - per_cpu(ia64_tr_num, cpu) = 8; - return; - } - per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; - if (per_cpu(ia64_tr_num, cpu) > - (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) - per_cpu(ia64_tr_num, cpu) = - vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; - if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { - static int justonce = 1; - per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; - if (justonce) { - justonce = 0; - printk(KERN_DEBUG "TR register number exceeds " - "IA64_TR_ALLOC_MAX!\n"); - } - } -} - -/* - * is_tr_overlap - * - * Check overlap with inserted TRs. - */ -static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) -{ - u64 tr_log_size; - u64 tr_end; - u64 va_rr = ia64_get_rr(va); - u64 va_rid = RR_TO_RID(va_rr); - u64 va_end = va + (1<<log_size) - 1; - - if (va_rid != RR_TO_RID(p->rr)) - return 0; - tr_log_size = (p->itir & 0xff) >> 2; - tr_end = p->ifa + (1<<tr_log_size) - 1; - - if (va > tr_end || p->ifa > va_end) - return 0; - return 1; - -} - -/* - * ia64_insert_tr in virtual mode. Allocate a TR slot - * - * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr - * - * va : virtual address. - * pte : pte entries inserted. - * log_size: range to be covered. - * - * Return value: <0 : error No. - * - * >=0 : slot number allocated for TR. - * Must be called with preemption disabled. - */ -int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) -{ - int i, r; - unsigned long psr; - struct ia64_tr_entry *p; - int cpu = smp_processor_id(); - - if (!ia64_idtrs[cpu]) { - ia64_idtrs[cpu] = kmalloc_array(2 * IA64_TR_ALLOC_MAX, - sizeof(struct ia64_tr_entry), - GFP_KERNEL); - if (!ia64_idtrs[cpu]) - return -ENOMEM; - } - r = -EINVAL; - /*Check overlap with existing TR entries*/ - if (target_mask & 0x1) { - p = ia64_idtrs[cpu]; - for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); - i++, p++) { - if (p->pte & 0x1) - if (is_tr_overlap(p, va, log_size)) { - printk(KERN_DEBUG "Overlapped Entry" - "Inserted for TR Register!!\n"); - goto out; - } - } - } - if (target_mask & 0x2) { - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; - for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); - i++, p++) { - if (p->pte & 0x1) - if (is_tr_overlap(p, va, log_size)) { - printk(KERN_DEBUG "Overlapped Entry" - "Inserted for TR Register!!\n"); - goto out; - } - } - } - - for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { - switch (target_mask & 0x3) { - case 1: - if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) - goto found; - continue; - case 2: - if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - goto found; - continue; - case 3: - if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && - !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - goto found; - continue; - default: - r = -EINVAL; - goto out; - } - } -found: - if (i >= per_cpu(ia64_tr_num, cpu)) - return -EBUSY; - - /*Record tr info for mca handler use!*/ - if (i > per_cpu(ia64_tr_used, cpu)) - per_cpu(ia64_tr_used, cpu) = i; - - psr = ia64_clear_ic(); - if (target_mask & 0x1) { - ia64_itr(0x1, i, va, pte, log_size); - ia64_srlz_i(); - p = ia64_idtrs[cpu] + i; - p->ifa = va; - p->pte = pte; - p->itir = log_size << 2; - p->rr = ia64_get_rr(va); - } - if (target_mask & 0x2) { - ia64_itr(0x2, i, va, pte, log_size); - ia64_srlz_i(); - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; - p->ifa = va; - p->pte = pte; - p->itir = log_size << 2; - p->rr = ia64_get_rr(va); - } - ia64_set_psr(psr); - r = i; -out: - return r; -} -EXPORT_SYMBOL_GPL(ia64_itr_entry); - -/* - * ia64_purge_tr - * - * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. - * slot: slot number to be freed. - * - * Must be called with preemption disabled. - */ -void ia64_ptr_entry(u64 target_mask, int slot) -{ - int cpu = smp_processor_id(); - int i; - struct ia64_tr_entry *p; - - if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) - return; - - if (target_mask & 0x1) { - p = ia64_idtrs[cpu] + slot; - if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { - p->pte = 0; - ia64_ptr(0x1, p->ifa, p->itir>>2); - ia64_srlz_i(); - } - } - - if (target_mask & 0x2) { - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; - if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { - p->pte = 0; - ia64_ptr(0x2, p->ifa, p->itir>>2); - ia64_srlz_i(); - } - } - - for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { - if (((ia64_idtrs[cpu] + i)->pte & 0x1) || - ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - break; - } - per_cpu(ia64_tr_used, cpu) = i; -} -EXPORT_SYMBOL_GPL(ia64_ptr_entry); |