From 5245c924c2191ee0f39d8586a57178baba13dbf2 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Jun 2014 16:56:18 +0200 Subject: s390/qdio: add helpers to manage qdio buffers Users of qdio buffers employ different strategies to manage these buffers. The qeth driver uses huge contiguous buffers which leads to high order allocations with all their downsides. This patch provides helpers to allocate, free, and reset arrays of qdio buffers using non contiguous pages. Reviewed-by: Martin Peschke Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index d786c634e052..06f3034605a1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -415,6 +415,10 @@ struct qdio_brinfo_entry_l2 { #define QDIO_FLAG_SYNC_OUTPUT 0x02 #define QDIO_FLAG_PCI_OUT 0x10 +int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count); + extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); -- cgit v1.2.3 From bd858e84d4a179c3030dccba56efb6a93e455c10 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 10 Jul 2014 18:14:20 +0200 Subject: s390/kdump: Return NOTIFY_OK for all actions other than MEM_GOING_OFFLINE We only have to check kdump memory for the MEM_GOING_OFFLINE action. Therefore skip the test and return NOTIFY_OK for all other memory hotplug actions. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1e2264b46e4c..ae1d5be7dd88 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -501,6 +501,8 @@ static int kdump_mem_notifier(struct notifier_block *nb, { struct memory_notify *arg = data; + if (action != MEM_GOING_OFFLINE) + return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; if (arg->start_pfn > PFN_DOWN(crashk_res.end)) -- cgit v1.2.3 From 896cb7e635ec562cd9f2dc98dea193727a50eade Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 16 Jul 2014 17:21:01 +0200 Subject: s390/pci: fix kmsg component KMSG_COMPONENT has to be defined instead of COMPONENT. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 4 ++-- arch/s390/pci/pci_clp.c | 4 ++-- arch/s390/pci/pci_debug.c | 4 ++-- arch/s390/pci/pci_event.c | 4 ++-- arch/s390/pci/pci_sysfs.c | 4 ++-- drivers/pci/hotplug/s390_pci_hpc.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 30de42730b2f..2fa7b14b9c08 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -15,8 +15,8 @@ * Thomas Klein */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 96545d7659fd..6e22a247de9b 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c5c66840ac00..eec598c5939f 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 6d7f5a3016ca..460fdb21cf61 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 9190214b8702..fa3ce891e597 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -5,8 +5,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index d1332d2f8730..d77e46bca54c 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -7,8 +7,8 @@ * Jan Glauber */ -#define COMPONENT "zPCI hpc" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include #include -- cgit v1.2.3 From c60d1ae4efcb5790f7d085369baf66c167a6484f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 18 Jul 2014 17:37:08 +0200 Subject: s390/pci: introduce lazy IOTLB flushing for DMA unmap This changes the default IOTLB flushing method to lazy flushing, which means that there will be no direct flush after each DMA unmap operation. Instead, the iommu bitmap pointer will be adjusted after unmap, so that no DMA address will be re-used until after an iommu bitmap wrap-around. The only IOTLB flush will then happen after each wrap-around. A new kernel parameter "s390_iommu=" is also introduced, to allow changing the flushing behaviour to the old strict method. Reviewed-by: Sebastian Ott Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 7 ++++++ arch/s390/pci/pci_dma.c | 50 ++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b7fa2f599459..c848095f2cb0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3023,6 +3023,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. S [KNL] Run init in single mode + s390_iommu= [HW,S390] + Set s390 IOTLB flushing mode + strict + With strict flushing every unmap operation will result in + an IOTLB flush. Default is lazy flushing before reuse, + which is faster. + sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f91c03119804..4cbb29a4d615 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -16,6 +16,13 @@ static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; +static int s390_iommu_strict; + +static int zpci_refresh_global(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, + zdev->iommu_pages * PAGE_SIZE); +} static unsigned long *dma_alloc_cpu_table(void) { @@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, } /* - * rpcit is not required to establish new translations when previously - * invalid translation-table entries are validated, however it is - * required when altering previously valid entries. + * With zdev->tlb_refresh == 0, rpcit is not required to establish new + * translations when previously invalid translation-table entries are + * validated. With lazy unmap, it also is skipped for previously valid + * entries, but a global rpcit is then required before any address can + * be re-used, i.e. after each iommu bitmap wrap-around. */ if (!zdev->tlb_refresh && - ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - /* - * TODO: also need to check that the old entry is indeed INVALID - * and not only for one page but for the whole range... - * -> now we WARN_ON in that case but with lazy unmap that - * needs to be redone! - */ + (!s390_iommu_strict || + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) goto no_refresh; rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, @@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) { unsigned long offset, flags; + int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); - if (offset == -1) + if (offset == -1) { + /* wrap-around */ offset = __dma_alloc_iommu(zdev, 0, size); + wrap = 1; + } if (offset != -1) { zdev->next_bit = offset + size; - if (zdev->next_bit >= zdev->iommu_pages) - zdev->next_bit = 0; + if (!zdev->tlb_refresh && !s390_iommu_strict && wrap) + /* global flush after wrap-around with lazy unmap */ + zpci_refresh_global(zdev); } spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); return offset; @@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size if (!zdev->iommu_bitmap) goto out; bitmap_clear(zdev->iommu_bitmap, offset, size); - if (offset >= zdev->next_bit) + /* + * Lazy flush for unmap: need to move next_bit to avoid address re-use + * until wrap-around. + */ + if (!s390_iommu_strict && offset >= zdev->next_bit) zdev->next_bit = offset + size; out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); @@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = { /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_dma_ops); + +static int __init s390_iommu_setup(char *str) +{ + if (!strncmp(str, "strict", 6)) + s390_iommu_strict = 1; + return 0; +} + +__setup("s390_iommu=", s390_iommu_setup); -- cgit v1.2.3 From dc295880c6752076f8b94ba3885d0bfff09e3e82 Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Tue, 22 Jul 2014 16:50:57 +0200 Subject: s390/seccomp: fix error return for filtered system calls The syscall_set_return_value function of s390 negates the error argument before storing the value to the return register gpr2. This is incorrect, the seccomp code already passes the negative error value. Store the unmodified error value to gpr2. Signed-off-by: Jan Willeke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index abad78d5b10c..5bc12598ae9e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->gprs[2] = error ? -error : val; + regs->gprs[2] = error ? error : val; } static inline void syscall_get_arguments(struct task_struct *task, -- cgit v1.2.3 From e2213e04c1b1e44a09a9d05b79809b7e63c9217e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 22 Jul 2014 16:58:52 +0200 Subject: s390/irq: improve displayed interrupt order in /proc/interrupts Rework the irqclass_main_desc and irqclass_sub_desc data structures which are used to report detaild IRQ statistics in /proc/interrupts. When called from the procfs ops, the entries in the structures are processed one by one. The index of an IRQ in the structures is identical to its definition in the "enum interruption_class". To control and (re)order the displayed sequence, introduce an irq member in each entry. This helps to display related IRQs together without changing the assigned number in the interruption_class enumeration. That means, adding and displaying new IRQs are independent. Finally, this new behavior improves to maintain a kernel ABI. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/irq.c | 95 ++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 99b0b09646ca..8eb82443cfbd 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -30,6 +30,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); struct irq_class { + int irq; char *name; char *desc; }; @@ -45,9 +46,9 @@ struct irq_class { * up with having a sum which accounts each interrupt twice. */ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { - [EXT_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"}, - [THIN_INTERRUPT] = {.name = "AIO"}, + {.irq = EXT_INTERRUPT, .name = "EXT"}, + {.irq = IO_INTERRUPT, .name = "I/O"}, + {.irq = THIN_INTERRUPT, .name = "AIO"}, }; /* @@ -56,38 +57,38 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { * In addition this list contains non external / I/O events like NMIs. */ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { - [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, - [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, - [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, - [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, - [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, - [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, - [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, - [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, - [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, - [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, - [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, - [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, - [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, - [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, - [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, - [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, - [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, - [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, - [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, - [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, - [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, - [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, - [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, - [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, - [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, - [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, - [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, - [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, - [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, - [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, - [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, - [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, + {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"}, + {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"}, + {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"}, + {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"}, + {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"}, + {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"}, + {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"}, + {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"}, + {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"}, + {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, + {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"}, + {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, + {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, + {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, + {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, + {.irq = IRQIO_CLW, .name = "CLW", .desc = "[I/O] CLAW"}, + {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, + {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"}, + {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, + {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" }, + {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, + {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, + {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; void __init init_IRQ(void) @@ -116,33 +117,37 @@ void do_IRQ(struct pt_regs *regs, int irq) */ int show_interrupts(struct seq_file *p, void *v) { - int irq = *(loff_t *) v; - int cpu; + int index = *(loff_t *) v; + int cpu, irq; get_online_cpus(); - if (irq == 0) { + if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); goto out; } - if (irq < NR_IRQS) { - if (irq >= NR_IRQS_BASE) + if (index < NR_IRQS) { + if (index >= NR_IRQS_BASE) goto out; - seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + /* Adjust index to process irqclass_main_desc array entries */ + index--; + seq_printf(p, "%s: ", irqclass_main_desc[index].name); + irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); goto out; } - for (irq = 0; irq < NR_ARCH_IRQS; irq++) { - seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for (index = 0; index < NR_ARCH_IRQS; index++) { + seq_printf(p, "%s: ", irqclass_sub_desc[index].name); + irq = irqclass_sub_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); - if (irqclass_sub_desc[irq].desc) - seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + if (irqclass_sub_desc[index].desc) + seq_printf(p, " %s", irqclass_sub_desc[index].desc); seq_putc(p, '\n'); } out: -- cgit v1.2.3 From 55e4283c3eb1d850893f645dd695c9c75d5fa1fc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2014 14:23:29 +0200 Subject: KVM: s390/mm: Fix page table locking vs. split pmd lock commit ec66ad66a0de87866be347b5ecc83bd46427f53b (s390/mm: enable split page table lock for PMD level) activated the split pmd lock for s390. Turns out that we missed one place: We also have to take the pmd lock instead of the page table lock when we reallocate the page tables (==> changing entries in the PMD) during sie enablement. Cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37b8241ec784..f90ad8592b36 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1279,6 +1279,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, { unsigned long next, *table, *new; struct page *page; + spinlock_t *ptl; pmd_t *pmd; pmd = pmd_offset(pud, addr); @@ -1296,7 +1297,7 @@ again: if (!new) return -ENOMEM; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (likely((unsigned long *) pmd_deref(*pmd) == table)) { /* Nuke pmd entry pointing to the "short" page table */ pmdp_flush_lazy(mm, addr, pmd); @@ -1310,7 +1311,7 @@ again: page_table_free_rcu(tlb, table); new = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (new) { page_table_free_pgste(new); goto again; -- cgit v1.2.3 From 152125b7a882df36a55a8eadbea6d0edf1461ee7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 24 Jul 2014 11:03:41 +0200 Subject: s390/mm: implement dirty bits for large segment table entries The large segment table entry format has block of bits for the ACC/F values for the large page. These bits are valid only if another bit (AV bit 0x10000) of the segment table entry is set. The ACC/F bits do not have a meaning if the AV bit is off. This allows to put the THP splitting bit, the segment young bit and the new segment dirty bit into the ACC/F bits as long as the AV bit stays off. The dirty and young information is only available if the pmd is large. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 197 ++++++++++++++++++++++------------------ arch/s390/mm/hugetlbpage.c | 103 ++++++++++----------- arch/s390/mm/pgtable.c | 3 + 3 files changed, 159 insertions(+), 144 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fcba5e03839f..b76317c1f3eb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -287,7 +287,14 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT + +#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */ +#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */ +#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */ +#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */ +#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */ +#define _SEGMENT_ENTRY_BITS_LARGE 0 +#define _SEGMENT_ENTRY_ORIGIN_LARGE 0 #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) @@ -350,7 +357,7 @@ extern unsigned long MODULES_END; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ @@ -359,30 +366,34 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY (0) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) -#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ -#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ -#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG +#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ +#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ +#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ +#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ +#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ +#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * ..R...I...y. - * prot-none, old ..0...1...1. - * prot-none, young ..1...1...1. - * read-only, old ..1...1...0. - * read-only, young ..1...0...1. - * read-write, old ..0...1...0. - * read-write, young ..0...0...1. + * dy..R...I...wr + * prot-none, clean, old 00..1...1...00 + * prot-none, clean, young 01..1...1...00 + * prot-none, dirty, old 10..1...1...00 + * prot-none, dirty, young 11..1...1...00 + * read-only, clean, old 00..1...1...01 + * read-only, clean, young 01..1...0...01 + * read-only, dirty, old 10..1...1...01 + * read-only, dirty, young 11..1...0...01 + * read-write, clean, old 00..1...1...11 + * read-write, clean, young 01..1...0...11 + * read-write, dirty, old 10..0...1...11 + * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) */ -#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ - -/* Set of bits not changed in pmd_modify */ -#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ - | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) +#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf000000000000000UL @@ -455,10 +466,11 @@ extern unsigned long MODULES_END; * Segment entry (large page) protection definitions. */ #define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ - _SEGMENT_ENTRY_NONE) -#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \ + _SEGMENT_ENTRY_READ) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -569,25 +581,23 @@ static inline int pmd_none(pmd_t pmd) static inline int pmd_large(pmd_t pmd) { -#ifdef CONFIG_64BIT return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; -#else - return 0; -#endif } -static inline int pmd_prot_none(pmd_t pmd) +static inline int pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && - (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } static inline int pmd_bad(pmd_t pmd) { -#ifdef CONFIG_64BIT if (pmd_large(pmd)) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; -#endif return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -607,20 +617,22 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - if (pmd_prot_none(pmd)) - return 0; - return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + int dirty = 1; + if (pmd_large(pmd)) + dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; + return dirty; } static inline int pmd_young(pmd_t pmd) { - int young = 0; -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) - young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; - else + int young = 1; + if (pmd_large(pmd)) young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; -#endif return young; } @@ -1391,7 +1403,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) @@ -1413,41 +1425,75 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) return pgprot_val(SEGMENT_WRITE); } -static inline pmd_t pmd_mkyoung(pmd_t pmd) +static inline pmd_t pmd_wrprotect(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + return pmd; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - } else { + } + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY; + if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } + return pmd; +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + if (pmd_large(pmd)) { pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - } else { + if (pmd_large(pmd)) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - int young; - - young = pmd_young(pmd); - pmd_val(pmd) &= _SEGMENT_CHG_MASK; + if (pmd_large(pmd)) { + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | + _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + return pmd; + } + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= massage_pgprot_pmd(newprot); - if (young) - pmd = pmd_mkyoung(pmd); return pmd; } @@ -1455,16 +1501,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return pmd_mkyoung(__pmd); + return __pmd; } -static inline pmd_t pmd_mkwrite(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - return pmd; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ static inline void __pmdp_csp(pmd_t *pmdp) @@ -1555,34 +1594,21 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) && + (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } static inline pmd_t pmd_mkhuge(pmd_t pmd) { pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; -} - -static inline pmd_t pmd_wrprotect(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; -} - -static inline pmd_t pmd_mkdirty(pmd_t pmd) -{ - /* No dirty bit in the segment table entry. */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1647,11 +1673,6 @@ static inline int has_transparent_hugepage(void) { return MACHINE_HAS_HPAGE ? 1 : 0; } - -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return pmd_val(pmd) >> PAGE_SHIFT; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 0ff66a7e29bb..389bc17934b7 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -10,42 +10,33 @@ static inline pmd_t __pte_to_pmd(pte_t pte) { - int none, young, prot; pmd_t pmd; /* - * Convert encoding pte bits pmd bits - * .IR...wrdytp ..R...I...y. - * empty .10...000000 -> ..0...1...0. - * prot-none, clean, old .11...000001 -> ..0...1...1. - * prot-none, clean, young .11...000101 -> ..1...1...1. - * prot-none, dirty, old .10...001001 -> ..0...1...1. - * prot-none, dirty, young .10...001101 -> ..1...1...1. - * read-only, clean, old .11...010001 -> ..1...1...0. - * read-only, clean, young .01...010101 -> ..1...0...1. - * read-only, dirty, old .11...011001 -> ..1...1...0. - * read-only, dirty, young .01...011101 -> ..1...0...1. - * read-write, clean, old .11...110001 -> ..0...1...0. - * read-write, clean, young .01...110101 -> ..0...0...1. - * read-write, dirty, old .10...111001 -> ..0...1...0. - * read-write, dirty, young .00...111101 -> ..0...0...1. - * Huge ptes are dirty by definition, a clean pte is made dirty - * by the conversion. + * Convert encoding pte bits pmd bits + * .IR...wrdytp dy..R...I...wr + * empty .10...000000 -> 00..0...1...00 + * prot-none, clean, old .11...000001 -> 00..1...1...00 + * prot-none, clean, young .11...000101 -> 01..1...1...00 + * prot-none, dirty, old .10...001001 -> 10..1...1...00 + * prot-none, dirty, young .10...001101 -> 11..1...1...00 + * read-only, clean, old .11...010001 -> 00..1...1...01 + * read-only, clean, young .01...010101 -> 01..1...0...01 + * read-only, dirty, old .11...011001 -> 10..1...1...01 + * read-only, dirty, young .01...011101 -> 11..1...0...01 + * read-write, clean, old .11...110001 -> 00..0...1...11 + * read-write, clean, young .01...110101 -> 01..0...0...11 + * read-write, dirty, old .10...111001 -> 10..0...1...11 + * read-write, dirty, young .00...111101 -> 11..0...0...11 */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; - if (pte_val(pte) & _PAGE_INVALID) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - none = (pte_val(pte) & _PAGE_PRESENT) && - !(pte_val(pte) & _PAGE_READ) && - !(pte_val(pte) & _PAGE_WRITE); - prot = (pte_val(pte) & _PAGE_PROTECT) && - !(pte_val(pte) & _PAGE_WRITE); - young = pte_val(pte) & _PAGE_YOUNG; - if (none || young) - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - if (prot || (none && young)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; return pmd; @@ -56,34 +47,31 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_t pte; /* - * Convert encoding pmd bits pte bits - * ..R...I...y. .IR...wrdytp - * empty ..0...1...0. -> .10...000000 - * prot-none, old ..0...1...1. -> .10...001001 - * prot-none, young ..1...1...1. -> .10...001101 - * read-only, old ..1...1...0. -> .11...011001 - * read-only, young ..1...0...1. -> .01...011101 - * read-write, old ..0...1...0. -> .10...111001 - * read-write, young ..0...0...1. -> .00...111101 - * Huge ptes are dirty by definition + * Convert encoding pmd bits pte bits + * dy..R...I...wr .IR...wrdytp + * empty 00..0...1...00 -> .10...001100 + * prot-none, clean, old 00..0...1...00 -> .10...000001 + * prot-none, clean, young 01..0...1...00 -> .10...000101 + * prot-none, dirty, old 10..0...1...00 -> .10...001001 + * prot-none, dirty, young 11..0...1...00 -> .10...001101 + * read-only, clean, old 00..1...1...01 -> .11...010001 + * read-only, clean, young 01..1...1...01 -> .11...010101 + * read-only, dirty, old 10..1...1...01 -> .11...011001 + * read-only, dirty, young 11..1...1...01 -> .11...011101 + * read-write, clean, old 00..0...1...11 -> .10...110001 + * read-write, clean, young 01..0...1...11 -> .10...110101 + * read-write, dirty, old 10..0...1...11 -> .10...111001 + * read-write, dirty, young 11..0...1...11 -> .10...111101 */ if (pmd_present(pmd)) { - pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | - (pmd_val(pmd) & PAGE_MASK); - if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) - pte_val(pte) |= _PAGE_INVALID; - if (pmd_prot_none(pmd)) { - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_YOUNG; - } else { - pte_val(pte) |= _PAGE_READ; - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_WRITE; - if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) - pte_val(pte) |= _PAGE_YOUNG; - } + pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; + pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -96,6 +84,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { + /* Emulated huge ptes loose the dirty and young bit */ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else @@ -113,6 +102,8 @@ pte_t huge_ptep_get(pte_t *ptep) origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= *(unsigned long *) origin; + /* Emulated huge ptes are young and dirty by definition */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; } return __pmd_to_pte(pmd); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f90ad8592b36..19daa53a3da4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1433,6 +1433,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, { VM_BUG_ON(address & ~HPAGE_PMD_MASK); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); if (pmd_same(*pmdp, entry)) return 0; pmdp_invalidate(vma, address, pmdp); -- cgit v1.2.3 From 36e7fdaa1a04fcf65b864232e1af56a51c7814d6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Aug 2014 09:57:51 +0200 Subject: s390/locking: Reenable optimistic spinning commit 4badad352a6bb202ec68afa7a574c0bb961e5ebc (locking/mutex: Disable optimistic spinning on some architectures) fenced spinning for architectures without proper cmpxchg. There is no need to disable mutex spinning on s390, though: The instructions CS,CSG and friends provide the proper guarantees. (We dont implement cmpxchg with locks). Signed-off-by: Christian Borntraeger Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bb63499fc5d3..9f00f9301613 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT -- cgit v1.2.3