summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/extmem.c138
-rw-r--r--arch/s390/mm/fault.c28
-rw-r--r--arch/s390/mm/init.c184
-rw-r--r--arch/s390/mm/ioremap.c84
-rw-r--r--arch/s390/mm/vmem.c382
6 files changed, 499 insertions, 319 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index aa9a42b6e62d..8e09db1edbb9 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,6 +2,6 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o ioremap.o extmem.o mmap.o
+obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o
obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 226275d5c4f6..775bf19e742b 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -14,12 +14,14 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/bootmem.h>
+#include <linux/ctype.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/errno.h>
#include <asm/extmem.h>
#include <asm/cpcmd.h>
-#include <linux/ctype.h>
+#include <asm/setup.h>
#define DCSS_DEBUG /* Debug messages on/off */
@@ -77,15 +79,11 @@ struct dcss_segment {
int segcnt;
};
-static DEFINE_SPINLOCK(dcss_lock);
+static DEFINE_MUTEX(dcss_lock);
static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list);
static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
"EW/EN-MIXED" };
-extern struct {
- unsigned long addr, size, type;
-} memory_chunk[MEMORY_CHUNKS];
-
/*
* Create the 8 bytes, ebcdic VM segment name from
* an ascii name.
@@ -117,7 +115,7 @@ segment_by_name (char *name)
struct list_head *l;
struct dcss_segment *tmp, *retval = NULL;
- assert_spin_locked(&dcss_lock);
+ BUG_ON(!mutex_is_locked(&dcss_lock));
dcss_mkname (name, dcss_name);
list_for_each (l, &dcss_list) {
tmp = list_entry (l, struct dcss_segment, list);
@@ -241,65 +239,6 @@ query_segment_type (struct dcss_segment *seg)
}
/*
- * check if the given segment collides with guest storage.
- * returns 1 if this is the case, 0 if no collision was found
- */
-static int
-segment_overlaps_storage(struct dcss_segment *seg)
-{
- int i;
-
- for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- if (memory_chunk[i].type != 0)
- continue;
- if ((memory_chunk[i].addr >> 20) > (seg->end >> 20))
- continue;
- if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20)
- < (seg->start_addr >> 20))
- continue;
- return 1;
- }
- return 0;
-}
-
-/*
- * check if segment collides with other segments that are currently loaded
- * returns 1 if this is the case, 0 if no collision was found
- */
-static int
-segment_overlaps_others (struct dcss_segment *seg)
-{
- struct list_head *l;
- struct dcss_segment *tmp;
-
- assert_spin_locked(&dcss_lock);
- list_for_each(l, &dcss_list) {
- tmp = list_entry(l, struct dcss_segment, list);
- if ((tmp->start_addr >> 20) > (seg->end >> 20))
- continue;
- if ((tmp->end >> 20) < (seg->start_addr >> 20))
- continue;
- if (seg == tmp)
- continue;
- return 1;
- }
- return 0;
-}
-
-/*
- * check if segment exceeds the kernel mapping range (detected or set via mem=)
- * returns 1 if this is the case, 0 if segment fits into the range
- */
-static inline int
-segment_exceeds_range (struct dcss_segment *seg)
-{
- int seg_last_pfn = (seg->end) >> PAGE_SHIFT;
- if (seg_last_pfn > max_pfn)
- return 1;
- return 0;
-}
-
-/*
* get info about a segment
* possible return values:
* -ENOSYS : we are not running on VM
@@ -344,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = query_segment_type (seg);
if (rc < 0)
goto out_free;
- if (segment_exceeds_range(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - exceeds"
- " kernel mapping range\n",name);
- rc = -ERANGE;
+
+ rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+
+ switch (rc) {
+ case 0:
+ break;
+ case -ENOSPC:
+ PRINT_WARN("segment_load: not loading segment %s - overlaps "
+ "storage/segment\n", name);
goto out_free;
- }
- if (segment_overlaps_storage(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - overlaps"
- " storage\n",name);
- rc = -ENOSPC;
+ case -ERANGE:
+ PRINT_WARN("segment_load: not loading segment %s - exceeds "
+ "kernel mapping range\n", name);
goto out_free;
- }
- if (segment_overlaps_others(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - overlaps"
- " other segments\n",name);
- rc = -EBUSY;
+ default:
+ PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
+ name, rc);
goto out_free;
}
+
if (do_nonshared)
dcss_command = DCSS_LOADNSR;
else
@@ -375,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = dcss_diag_translate_rc (seg->end);
dcss_diag(DCSS_PURGESEG, seg->dcss_name,
&seg->start_addr, &seg->end);
- goto out_free;
+ goto out_shared;
}
seg->do_nonshared = do_nonshared;
atomic_set(&seg->ref_count, 1);
@@ -394,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
(void*)seg->start_addr, (void*)seg->end,
segtype_string[seg->vm_segtype]);
goto out;
+ out_shared:
+ remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
@@ -429,7 +372,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
if (!MACHINE_IS_VM)
return -ENOSYS;
- spin_lock (&dcss_lock);
+ mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL)
rc = __segment_load (name, do_nonshared, addr, end);
@@ -444,7 +387,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
rc = -EPERM;
}
}
- spin_unlock (&dcss_lock);
+ mutex_unlock(&dcss_lock);
return rc;
}
@@ -467,7 +410,7 @@ segment_modify_shared (char *name, int do_nonshared)
unsigned long dummy;
int dcss_command, rc, diag_cc;
- spin_lock (&dcss_lock);
+ mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL) {
rc = -EINVAL;
@@ -508,7 +451,7 @@ segment_modify_shared (char *name, int do_nonshared)
&dummy, &dummy);
kfree(seg);
out_unlock:
- spin_unlock(&dcss_lock);
+ mutex_unlock(&dcss_lock);
return rc;
}
@@ -526,21 +469,21 @@ segment_unload(char *name)
if (!MACHINE_IS_VM)
return;
- spin_lock(&dcss_lock);
+ mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL) {
PRINT_ERR ("could not find segment %s in segment_unload, "
"please report to linux390@de.ibm.com\n",name);
goto out_unlock;
}
- if (atomic_dec_return(&seg->ref_count) == 0) {
- list_del(&seg->list);
- dcss_diag(DCSS_PURGESEG, seg->dcss_name,
- &dummy, &dummy);
- kfree(seg);
- }
+ if (atomic_dec_return(&seg->ref_count) != 0)
+ goto out_unlock;
+ remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+ list_del(&seg->list);
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+ kfree(seg);
out_unlock:
- spin_unlock(&dcss_lock);
+ mutex_unlock(&dcss_lock);
}
/*
@@ -559,12 +502,13 @@ segment_save(char *name)
if (!MACHINE_IS_VM)
return;
- spin_lock(&dcss_lock);
+ mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL) {
- PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name);
- return;
+ PRINT_ERR("could not find segment %s in segment_save, please "
+ "report to linux390@de.ibm.com\n", name);
+ goto out;
}
startpfn = seg->start_addr >> PAGE_SHIFT;
@@ -591,7 +535,7 @@ segment_save(char *name)
goto out;
}
out:
- spin_unlock(&dcss_lock);
+ mutex_unlock(&dcss_lock);
}
EXPORT_SYMBOL(segment_load);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1c323bbfda91..cd85e34d8703 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -31,6 +31,7 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/kdebug.h>
+#include <asm/s390_ext.h>
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
@@ -394,6 +395,7 @@ void do_dat_exception(struct pt_regs *regs, unsigned long error_code)
/*
* 'pfault' pseudo page faults routines.
*/
+static ext_int_info_t ext_int_pfault;
static int pfault_disable = 0;
static int __init nopfault(char *str)
@@ -422,7 +424,7 @@ int pfault_init(void)
__PF_RES_FIELD };
int rc;
- if (pfault_disable)
+ if (!MACHINE_IS_VM || pfault_disable)
return -1;
asm volatile(
" diag %1,%0,0x258\n"
@@ -440,7 +442,7 @@ void pfault_fini(void)
pfault_refbk_t refbk =
{ 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
- if (pfault_disable)
+ if (!MACHINE_IS_VM || pfault_disable)
return;
__ctl_clear_bit(0,9);
asm volatile(
@@ -500,5 +502,25 @@ pfault_interrupt(__u16 error_code)
set_tsk_need_resched(tsk);
}
}
-#endif
+void __init pfault_irq_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return;
+
+ /*
+ * Try to get pfault pseudo page faults going.
+ */
+ if (register_early_external_interrupt(0x2603, pfault_interrupt,
+ &ext_int_pfault) != 0)
+ panic("Couldn't request external interrupt 0x2603");
+
+ if (pfault_init() == 0)
+ return;
+
+ /* Tough luck, no pfault. */
+ pfault_disable = 1;
+ unregister_early_external_interrupt(0x2603, pfault_interrupt,
+ &ext_int_pfault);
+}
+#endif
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index e1881c31b1cb..4bb21be3b007 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -24,6 +24,7 @@
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/pfn.h>
+#include <linux/poison.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -69,6 +70,8 @@ void show_mem(void)
printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
i = max_mapnr;
while (i-- > 0) {
+ if (!pfn_valid(i))
+ continue;
page = pfn_to_page(i);
total++;
if (PageReserved(page))
@@ -84,150 +87,52 @@ void show_mem(void)
printk("%d pages swap cached\n",cached);
}
-extern unsigned long __initdata zholes_size[];
-/*
- * paging_init() sets up the page tables
- */
-
-#ifndef CONFIG_64BIT
-void __init paging_init(void)
+static void __init setup_ro_region(void)
{
- pgd_t * pg_dir;
- pte_t * pg_table;
- pte_t pte;
- int i;
- unsigned long tmp;
- unsigned long pfn = 0;
- unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
- static const int ssm_mask = 0x04000000L;
- unsigned long ro_start_pfn, ro_end_pfn;
- unsigned long zones_size[MAX_NR_ZONES];
-
- ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
- ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
-
- memset(zones_size, 0, sizeof(zones_size));
- zones_size[ZONE_DMA] = max_low_pfn;
- free_area_init_node(0, &contig_page_data, zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT,
- zholes_size);
-
- /* unmap whole virtual address space */
-
- pg_dir = swapper_pg_dir;
-
- for (i = 0; i < PTRS_PER_PGD; i++)
- pmd_clear((pmd_t *) pg_dir++);
-
- /*
- * map whole physical memory to virtual memory (identity mapping)
- */
-
- pg_dir = swapper_pg_dir;
-
- while (pfn < max_low_pfn) {
- /*
- * pg_table is physical at this point
- */
- pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
-
- pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
- pg_dir++;
-
- for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
- if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
- pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
- else
- pte = pfn_pte(pfn, PAGE_KERNEL);
- if (pfn >= max_low_pfn)
- pte_val(pte) = _PAGE_TYPE_EMPTY;
- set_pte(pg_table, pte);
- pfn++;
- }
- }
-
- S390_lowcore.kernel_asce = pgdir_k;
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(pgdir_k, 1, 1);
- __ctl_load(pgdir_k, 7, 7);
- __ctl_load(pgdir_k, 13, 13);
- __raw_local_irq_ssm(ssm_mask);
-
- local_flush_tlb();
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ pte_t new_pte;
+ unsigned long address, end;
+
+ address = ((unsigned long)&__start_rodata) & PAGE_MASK;
+ end = PFN_ALIGN((unsigned long)&__end_rodata);
+
+ for (; address < end; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ pmd = pmd_offset(pgd, address);
+ pte = pte_offset_kernel(pmd, address);
+ new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
+ set_pte(pte, new_pte);
+ }
}
-#else /* CONFIG_64BIT */
+extern void vmem_map_init(void);
+/*
+ * paging_init() sets up the page tables
+ */
void __init paging_init(void)
{
- pgd_t * pg_dir;
- pmd_t * pm_dir;
- pte_t * pt_dir;
- pte_t pte;
- int i,j,k;
- unsigned long pfn = 0;
- unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
- _KERN_REGION_TABLE;
+ pgd_t *pg_dir;
+ int i;
+ unsigned long pgdir_k;
static const int ssm_mask = 0x04000000L;
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long dma_pfn, high_pfn;
- unsigned long ro_start_pfn, ro_end_pfn;
-
- memset(zones_size, 0, sizeof(zones_size));
- dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
- high_pfn = max_low_pfn;
- ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
- ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
-
- if (dma_pfn > high_pfn)
- zones_size[ZONE_DMA] = high_pfn;
- else {
- zones_size[ZONE_DMA] = dma_pfn;
- zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
- }
-
- /* Initialize mem_map[]. */
- free_area_init_node(0, &contig_page_data, zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
- /*
- * map whole physical memory to virtual memory (identity mapping)
- */
-
- pg_dir = swapper_pg_dir;
-
- for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {
+ pg_dir = swapper_pg_dir;
- if (pfn >= max_low_pfn) {
- pgd_clear(pg_dir);
- continue;
- }
-
- pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4);
- pgd_populate(&init_mm, pg_dir, pm_dir);
-
- for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
- if (pfn >= max_low_pfn) {
- pmd_clear(pm_dir);
- continue;
- }
-
- pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
-
- for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
- if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
- pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
- else
- pte = pfn_pte(pfn, PAGE_KERNEL);
- if (pfn >= max_low_pfn)
- pte_val(pte) = _PAGE_TYPE_EMPTY;
- set_pte(pt_dir, pte);
- pfn++;
- }
- }
- }
+#ifdef CONFIG_64BIT
+ pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pgd_clear(pg_dir + i);
+#else
+ pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pmd_clear((pmd_t *)(pg_dir + i));
+#endif
+ vmem_map_init();
+ setup_ro_region();
S390_lowcore.kernel_asce = pgdir_k;
@@ -237,9 +142,11 @@ void __init paging_init(void)
__ctl_load(pgdir_k, 13, 13);
__raw_local_irq_ssm(ssm_mask);
- local_flush_tlb();
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+ free_area_init_nodes(max_zone_pfns);
}
-#endif /* CONFIG_64BIT */
void __init mem_init(void)
{
@@ -269,6 +176,8 @@ void __init mem_init(void)
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&__start_rodata,
PFN_ALIGN((unsigned long)&__end_rodata) - 1);
+ printk("Virtual memmap size: %ldk\n",
+ (max_pfn * sizeof(struct page)) >> 10);
}
void free_initmem(void)
@@ -279,6 +188,7 @@ void free_initmem(void)
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c
index 0f6e9ecbefe2..3d2100a4e209 100644
--- a/arch/s390/mm/ioremap.c
+++ b/arch/s390/mm/ioremap.c
@@ -15,87 +15,8 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/pgalloc.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
- unsigned long pfn;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
- pfn = phys_addr >> PAGE_SHIFT;
- do {
- if (!pte_none(*pte)) {
- printk("remap_area_pte: page already exists\n");
- BUG();
- }
- set_pte(pte, pfn_pte(pfn, __pgprot(flags)));
- address += PAGE_SIZE;
- pfn++;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
- do {
- pte_t * pte = pte_alloc_kernel(pmd, address);
- if (!pte)
- return -ENOMEM;
- remap_area_pte(pte, address, end - address, address + phys_addr, flags);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return 0;
-}
-
-static int remap_area_pages(unsigned long address, unsigned long phys_addr,
- unsigned long size, unsigned long flags)
-{
- int error;
- pgd_t * dir;
- unsigned long end = address + size;
-
- phys_addr -= address;
- dir = pgd_offset(&init_mm, address);
- flush_cache_all();
- if (address >= end)
- BUG();
- do {
- pmd_t *pmd;
- pmd = pmd_alloc(&init_mm, dir, address);
- error = -ENOMEM;
- if (!pmd)
- break;
- if (remap_area_pmd(pmd, address, end - address,
- phys_addr + address, flags))
- break;
- error = 0;
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (address && (address < end));
- flush_tlb_all();
- return 0;
-}
/*
* Generic mapping function (not visible outside):
@@ -122,7 +43,8 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
if (!area)
return NULL;
addr = area->addr;
- if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+ if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+ phys_addr, __pgprot(flags))) {
vfree(addr);
return NULL;
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 000000000000..cd3d93e8c211
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,382 @@
+/*
+ * arch/s390/mm/vmem.c
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/bootmem.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+
+unsigned long vmalloc_end;
+EXPORT_SYMBOL(vmalloc_end);
+
+static struct page *vmem_map;
+static DEFINE_MUTEX(vmem_mutex);
+
+struct memory_segment {
+ struct list_head list;
+ unsigned long start;
+ unsigned long size;
+};
+
+static LIST_HEAD(mem_segs);
+
+void memmap_init(unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn)
+{
+ struct page *start, *end;
+ struct page *map_start, *map_end;
+ int i;
+
+ start = pfn_to_page(start_pfn);
+ end = start + size;
+
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+ unsigned long cstart, cend;
+
+ cstart = PFN_DOWN(memory_chunk[i].addr);
+ cend = cstart + PFN_DOWN(memory_chunk[i].size);
+
+ map_start = mem_map + cstart;
+ map_end = mem_map + cend;
+
+ if (map_start < start)
+ map_start = start;
+ if (map_end > end)
+ map_end = end;
+
+ map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
+ / sizeof(struct page);
+ map_end += ((PFN_ALIGN((unsigned long) map_end)
+ - (unsigned long) map_end)
+ / sizeof(struct page));
+
+ if (map_start < map_end)
+ memmap_init_zone((unsigned long)(map_end - map_start),
+ nid, zone, page_to_pfn(map_start),
+ MEMMAP_EARLY);
+ }
+}
+
+static inline void *vmem_alloc_pages(unsigned int order)
+{
+ if (slab_is_available())
+ return (void *)__get_free_pages(GFP_KERNEL, order);
+ return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+}
+
+static inline pmd_t *vmem_pmd_alloc(void)
+{
+ pmd_t *pmd;
+ int i;
+
+ pmd = vmem_alloc_pages(PMD_ALLOC_ORDER);
+ if (!pmd)
+ return NULL;
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_clear(pmd + i);
+ return pmd;
+}
+
+static inline pte_t *vmem_pte_alloc(void)
+{
+ pte_t *pte;
+ pte_t empty_pte;
+ int i;
+
+ pte = vmem_alloc_pages(PTE_ALLOC_ORDER);
+ if (!pte)
+ return NULL;
+ pte_val(empty_pte) = _PAGE_TYPE_EMPTY;
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(pte + i, empty_pte);
+ return pte;
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_range(unsigned long start, unsigned long size)
+{
+ unsigned long address;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+ int ret = -ENOMEM;
+
+ for (address = start; address < start + size; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ pm_dir = vmem_pmd_alloc();
+ if (!pm_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir)) {
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte(pt_dir, pte);
+ }
+ ret = 0;
+out:
+ flush_tlb_kernel_range(start, start + size);
+ return ret;
+}
+
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ * Currently only invalidates page table entries.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
+{
+ unsigned long address;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+
+ pte_val(pte) = _PAGE_TYPE_EMPTY;
+ for (address = start; address < start + size; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir))
+ continue;
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir))
+ continue;
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ set_pte(pt_dir, pte);
+ }
+ flush_tlb_kernel_range(start, start + size);
+}
+
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+static int vmem_add_mem_map(unsigned long start, unsigned long size)
+{
+ unsigned long address, start_addr, end_addr;
+ struct page *map_start, *map_end;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+ int ret = -ENOMEM;
+
+ map_start = vmem_map + PFN_DOWN(start);
+ map_end = vmem_map + PFN_DOWN(start + size);
+
+ start_addr = (unsigned long) map_start & PAGE_MASK;
+ end_addr = PFN_ALIGN((unsigned long) map_end);
+
+ for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ pm_dir = vmem_pmd_alloc();
+ if (!pm_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir)) {
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ if (pte_none(*pt_dir)) {
+ unsigned long new_page;
+
+ new_page =__pa(vmem_alloc_pages(0));
+ if (!new_page)
+ goto out;
+ pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte(pt_dir, pte);
+ }
+ }
+ ret = 0;
+out:
+ flush_tlb_kernel_range(start_addr, end_addr);
+ return ret;
+}
+
+static int vmem_add_mem(unsigned long start, unsigned long size)
+{
+ int ret;
+
+ ret = vmem_add_range(start, size);
+ if (ret)
+ return ret;
+ return vmem_add_mem_map(start, size);
+}
+
+/*
+ * Add memory segment to the segment list if it doesn't overlap with
+ * an already present segment.
+ */
+static int insert_memory_segment(struct memory_segment *seg)
+{
+ struct memory_segment *tmp;
+
+ if (PFN_DOWN(seg->start + seg->size) > max_pfn ||
+ seg->start + seg->size < seg->start)
+ return -ERANGE;
+
+ list_for_each_entry(tmp, &mem_segs, list) {
+ if (seg->start >= tmp->start + tmp->size)
+ continue;
+ if (seg->start + seg->size <= tmp->start)
+ continue;
+ return -ENOSPC;
+ }
+ list_add(&seg->list, &mem_segs);
+ return 0;
+}
+
+/*
+ * Remove memory segment from the segment list.
+ */
+static void remove_memory_segment(struct memory_segment *seg)
+{
+ list_del(&seg->list);
+}
+
+static void __remove_shared_memory(struct memory_segment *seg)
+{
+ remove_memory_segment(seg);
+ vmem_remove_range(seg->start, seg->size);
+}
+
+int remove_shared_memory(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+
+ ret = -ENOENT;
+ list_for_each_entry(seg, &mem_segs, list) {
+ if (seg->start == start && seg->size == size)
+ break;
+ }
+
+ if (seg->start != start || seg->size != size)
+ goto out;
+
+ ret = 0;
+ __remove_shared_memory(seg);
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+int add_shared_memory(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ struct page *page;
+ unsigned long pfn, num_pfn, end_pfn;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+ ret = -ENOMEM;
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ goto out;
+ seg->start = start;
+ seg->size = size;
+
+ ret = insert_memory_segment(seg);
+ if (ret)
+ goto out_free;
+
+ ret = vmem_add_mem(start, size);
+ if (ret)
+ goto out_remove;
+
+ pfn = PFN_DOWN(start);
+ num_pfn = PFN_DOWN(size);
+ end_pfn = pfn + num_pfn;
+
+ page = pfn_to_page(pfn);
+ memset(page, 0, num_pfn * sizeof(struct page));
+
+ for (; pfn < end_pfn; pfn++) {
+ page = pfn_to_page(pfn);
+ init_page_count(page);
+ reset_page_mapcount(page);
+ SetPageReserved(page);
+ INIT_LIST_HEAD(&page->lru);
+ }
+ goto out;
+
+out_remove:
+ __remove_shared_memory(seg);
+out_free:
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+/*
+ * map whole physical memory to virtual memory (identity mapping)
+ */
+void __init vmem_map_init(void)
+{
+ unsigned long map_size;
+ int i;
+
+ map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page);
+ vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size);
+ vmem_map = (struct page *) vmalloc_end;
+ NODE_DATA(0)->node_mem_map = vmem_map;
+
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
+ vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
+}
+
+/*
+ * Convert memory chunk array to a memory segment list so there is a single
+ * list that contains both r/w memory and shared memory segments.
+ */
+static int __init vmem_convert_memory_chunk(void)
+{
+ struct memory_segment *seg;
+ int i;
+
+ mutex_lock(&vmem_mutex);
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+ if (!memory_chunk[i].size)
+ continue;
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ panic("Out of memory...\n");
+ seg->start = memory_chunk[i].addr;
+ seg->size = memory_chunk[i].size;
+ insert_memory_segment(seg);
+ }
+ mutex_unlock(&vmem_mutex);
+ return 0;
+}
+
+core_initcall(vmem_convert_memory_chunk);