summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c45
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/maccess.c67
-rw-r--r--arch/s390/mm/pgtable.c2
4 files changed, 98 insertions, 18 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 46ef3fd0663b..72cec9ecd96c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -294,7 +294,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
+ if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
address = __gmap_fault(address,
(struct gmap *) S390_lowcore.gmap);
if (address == -EFAULT) {
@@ -549,19 +549,15 @@ static void pfault_interrupt(struct ext_code ext_code,
if ((subcode & 0xff00) != __SUBCODE_MASK)
return;
kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
- if (subcode & 0x0080) {
- /* Get the token (= pid of the affected task). */
- pid = sizeof(void *) == 4 ? param32 : param64;
- rcu_read_lock();
- tsk = find_task_by_pid_ns(pid, &init_pid_ns);
- if (tsk)
- get_task_struct(tsk);
- rcu_read_unlock();
- if (!tsk)
- return;
- } else {
- tsk = current;
- }
+ /* Get the token (= pid of the affected task). */
+ pid = sizeof(void *) == 4 ? param32 : param64;
+ rcu_read_lock();
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
+ return;
spin_lock(&pfault_lock);
if (subcode & 0x0080) {
/* signal bit is set -> a page has been swapped in by VM */
@@ -574,6 +570,7 @@ static void pfault_interrupt(struct ext_code ext_code,
tsk->thread.pfault_wait = 0;
list_del(&tsk->thread.list);
wake_up_process(tsk);
+ put_task_struct(tsk);
} else {
/* Completion interrupt was faster than initial
* interrupt. Set pfault_wait to -1 so the initial
@@ -585,24 +582,35 @@ static void pfault_interrupt(struct ext_code ext_code,
if (tsk->state == TASK_RUNNING)
tsk->thread.pfault_wait = -1;
}
- put_task_struct(tsk);
} else {
/* signal bit not set -> a real page is missing. */
- if (tsk->thread.pfault_wait == -1) {
+ if (WARN_ON_ONCE(tsk != current))
+ goto out;
+ if (tsk->thread.pfault_wait == 1) {
+ /* Already on the list with a reference: put to sleep */
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_tsk_need_resched(tsk);
+ } else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial
* interrupt (pfault_wait == -1). Set pfault_wait
* back to zero and exit. */
tsk->thread.pfault_wait = 0;
} else {
/* Initial interrupt arrived before completion
- * interrupt. Let the task sleep. */
+ * interrupt. Let the task sleep.
+ * An extra task reference is needed since a different
+ * cpu may set the task state to TASK_RUNNING again
+ * before the scheduler is reached. */
+ get_task_struct(tsk);
tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
}
}
+out:
spin_unlock(&pfault_lock);
+ put_task_struct(tsk);
}
static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
@@ -620,6 +628,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
list_del(&thread->list);
tsk = container_of(thread, struct task_struct, thread);
wake_up_process(tsk);
+ put_task_struct(tsk);
}
spin_unlock_irq(&pfault_lock);
break;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 597bb2d27c3c..900de2b3cf28 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -58,6 +58,8 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
+ clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+ PTRS_PER_PTE * sizeof(pte_t));
page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index e1335dc2b1b7..795a0a9bb2eb 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/gfp.h>
+#include <linux/cpu.h>
#include <asm/ctl_reg.h>
/*
@@ -166,3 +167,69 @@ out:
free_page((unsigned long) buf);
return rc;
}
+
+/*
+ * Check if physical address is within prefix or zero page
+ */
+static int is_swapped(unsigned long addr)
+{
+ unsigned long lc;
+ int cpu;
+
+ if (addr < sizeof(struct _lowcore))
+ return 1;
+ for_each_online_cpu(cpu) {
+ lc = (unsigned long) lowcore_ptr[cpu];
+ if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return swapped prefix or zero page address
+ */
+static unsigned long get_swapped(unsigned long addr)
+{
+ unsigned long prefix = store_prefix();
+
+ if (addr < sizeof(struct _lowcore))
+ return addr + prefix;
+ if (addr >= prefix && addr < prefix + sizeof(struct _lowcore))
+ return addr - prefix;
+ return addr;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(unsigned long addr)
+{
+ void *bounce = (void *) addr;
+ unsigned long size;
+
+ get_online_cpus();
+ preempt_disable();
+ if (is_swapped(addr)) {
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ bounce = (void *) __get_free_page(GFP_ATOMIC);
+ if (bounce)
+ memcpy_real(bounce, (void *) get_swapped(addr), size);
+ }
+ preempt_enable();
+ put_online_cpus();
+ return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(unsigned long addr, void *buf)
+{
+ if ((void *) addr != buf)
+ free_page((unsigned long) buf);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6e765bf00670..a3db5a3ea083 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -822,6 +822,8 @@ int s390_enable_sie(void)
/* we copy the mm and let dup_mm create the page tables with_pgstes */
tsk->mm->context.alloc_pgste = 1;
+ /* make sure that both mms have a correct rss state */
+ sync_mm_rss(tsk->mm);
mm = dup_mm(tsk);
tsk->mm->context.alloc_pgste = 0;
if (!mm)