summaryrefslogtreecommitdiff
path: root/arch/s390/pci
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/pci')
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c89
-rw-r--r--arch/s390/pci/pci_bus.c47
-rw-r--r--arch/s390/pci/pci_bus.h7
-rw-r--r--arch/s390/pci/pci_clp.c7
-rw-r--r--arch/s390/pci/pci_event.c102
-rw-r--r--arch/s390/pci/pci_fixup.c23
-rw-r--r--arch/s390/pci/pci_insn.c12
-rw-r--r--arch/s390/pci/pci_iov.c56
-rw-r--r--arch/s390/pci/pci_iov.h7
-rw-r--r--arch/s390/pci/pci_mmio.c36
-rw-r--r--arch/s390/pci/pci_report.c158
-rw-r--r--arch/s390/pci/pci_report.h16
-rw-r--r--arch/s390/pci/pci_sysfs.c23
14 files changed, 475 insertions, 110 deletions
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 2c21f0394c9a..1810e0944a4e 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o pci_kvm_hook.o
+ pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
obj-$(CONFIG_SYSFS) += pci_sysfs.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 88f72745fa59..cd6676c2d602 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -31,6 +31,7 @@
#include <linux/lockdep.h>
#include <linux/list_sort.h>
+#include <asm/machine.h>
#include <asm/isc.h>
#include <asm/airq.h>
#include <asm/facility.h>
@@ -44,6 +45,7 @@
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -69,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
struct airq_iv *zpci_aif_sbv;
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (!zdev)
+ return;
+ mutex_lock(&zpci_add_remove_lock);
+ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
+}
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -124,14 +135,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
struct zpci_fib fib = {0};
u8 cc;
- WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
/* Work around off by one in ISM virt device */
if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
fib.pal = limit + (1 << 12);
else
fib.pal = limit;
- fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.iota = iota;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
if (cc)
@@ -255,7 +265,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- unsigned long prot)
+ pgprot_t prot)
{
/*
* When PCI MIO instructions are unavailable the "physical" address
@@ -265,7 +275,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
if (!static_branch_unlikely(&have_mio))
return (void __iomem *)phys_addr;
- return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+ return generic_ioremap_prot(phys_addr, size, prot);
}
EXPORT_SYMBOL(ioremap_prot);
@@ -690,6 +700,23 @@ int zpci_enable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+ u8 status;
+ int rc;
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ rc = zpci_iommu_register_ioat(zdev, &status);
+ if (rc)
+ zpci_disable_device(zdev);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
+
int zpci_disable_device(struct zpci_dev *zdev)
{
u32 fh = zdev->fh;
@@ -739,7 +766,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- u8 status;
int rc;
lockdep_assert_held(&zdev->state_lock);
@@ -758,19 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
return rc;
}
- rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
+ rc = zpci_reenable_device(zdev);
- if (zdev->dma_table)
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
-
- return 0;
+ return rc;
}
/**
@@ -831,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev)
{
int rc;
+ mutex_lock(&zpci_add_remove_lock);
zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
rc = zpci_init_iommu(zdev);
if (rc)
@@ -844,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev)
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
return 0;
error_destroy_iommu:
zpci_destroy_iommu(zdev);
error:
zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+ mutex_unlock(&zpci_add_remove_lock);
return rc;
}
@@ -919,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- /*
- * Remove device from zpci_list as it is going away. This also
- * makes sure we ignore subsequent zPCI events for this device.
- */
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+ lockdep_assert_held(&zdev->state_lock);
+ /* We may declare the device reserved multiple times */
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ return;
zdev->state = ZPCI_FN_STATE_RESERVED;
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ /*
+ * The underlying device is gone. Allow the zdev to be freed
+ * as soon as all other references are gone by accounting for
+ * the removal as a dropped reference.
+ */
zpci_zdev_put(zdev);
}
@@ -941,13 +959,14 @@ void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+ lockdep_assert_held(&zpci_add_remove_lock);
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
-
- if (zdev->zbus->bus)
- zpci_bus_remove_device(zdev, false);
-
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
+ /*
+ * We already hold zpci_list_lock thanks to kref_put_lock().
+ * This makes sure no new reference can be taken from the list.
+ */
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
@@ -1073,7 +1092,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+ clear_machine_feature(MFEATURE_PCI_MIO);
return NULL;
}
if (!strcmp(str, "force_floating")) {
@@ -1148,7 +1167,7 @@ static int __init pci_base_init(void)
return 0;
}
- if (MACHINE_HAS_PCI_MIO) {
+ if (test_machine_feature(MFEATURE_PCI_MIO)) {
static_branch_enable(&have_mio);
system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index d5ace00d10f0..81bdb54ad5e3 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -19,6 +19,7 @@
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/dma-direct.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
@@ -171,7 +172,6 @@ void zpci_bus_scan_busses(void)
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
- zpci_is_device_configured(zdev) &&
!zdev->vfn;
}
@@ -284,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
return zbus;
}
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ u64 aligned_end, size;
+ dma_addr_t dma_start;
+ int ret;
+
+ dma_start = PAGE_ALIGN(zdev->start_dma);
+ aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
+ if (aligned_end >= dma_start)
+ size = aligned_end - dma_start;
+ else
+ size = 0;
+ WARN_ON_ONCE(size == 0);
+
+ ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size);
+ if (ret)
+ pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev));
+}
+
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
+ pci_dma_range_setup(pdev);
+
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
@@ -332,6 +354,20 @@ error:
return rc;
}
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+
+ if (!zdev->vfn)
+ return false;
+
+ pdev = zpci_iov_find_parent_pf(zbus, zdev);
+ if (!pdev)
+ return true;
+ pci_dev_put(pdev);
+ return false;
+}
+
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
bool topo_is_tid = zdev->tid_avail;
@@ -346,6 +382,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
topo = topo_is_tid ? zdev->tid : zdev->pchid;
zbus = zpci_bus_get(topo, topo_is_tid);
+ /*
+ * An isolated VF gets its own domain/bus even if there exists
+ * a matching domain/bus already
+ */
+ if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+ zpci_bus_put(zbus);
+ zbus = NULL;
+ }
+
if (!zbus) {
zbus = zpci_bus_alloc(topo, topo_is_tid);
if (!zbus)
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e86a9419d233..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
- if (zdev)
- kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
static inline void zpci_zdev_get(struct zpci_dev *zdev)
{
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 14bf7e8d06b7..241f7251c873 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -56,7 +56,7 @@ static inline int clp_get_ilp(unsigned long *ilp)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -79,7 +79,7 @@ static __always_inline int clp_req(void *data, unsigned int lps)
u64 ignored;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->version = response->version;
zdev->maxstbl = response->maxstbl;
zdev->dtsm = response->dtsm;
+ zdev->rtr_avail = response->rtr;
switch (response->version) {
case 1:
@@ -427,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
return;
}
zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ if (IS_ERR(zdev))
+ return;
list_add_tail(&zdev->entry, scan_list);
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 7f7b732b3f3e..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -16,6 +16,7 @@
#include <asm/sclp.h>
#include "pci_bus.h"
+#include "pci_report.h"
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@@ -53,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -77,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -105,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -113,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -149,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -169,6 +178,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ char *status_str = "success";
struct pci_driver *driver;
/*
@@ -186,37 +197,56 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
+ status_str = "failed (pass-through)";
goto out_unlock;
}
driver = to_pci_driver(pdev->dev.driver);
if (!is_driver_supported(driver)) {
- if (!driver)
+ if (!driver) {
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
pci_name(pdev));
- else
+ status_str = "failed (no driver)";
+ } else {
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
pci_name(pdev),
driver->name);
+ status_str = "failed (no driver support)";
+ }
goto out_unlock;
}
ers_res = zpci_event_notify_error_detected(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on detection)";
goto out_unlock;
+ }
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on MMIO enable)";
goto out_unlock;
+ }
}
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
+ status_str = "failed (driver can't recover)";
goto out_unlock;
}
@@ -225,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
driver->err_handler->resume(pdev);
out_unlock:
pci_dev_unlock(pdev);
+ zpci_report_status(zdev, "recovery", status_str);
return ers_res;
}
@@ -260,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -268,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -322,6 +364,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
zdev->state = ZPCI_FN_STATE_STANDBY;
}
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+ lockdep_assert_held(&zdev->state_lock);
+ /*
+ * The zdev is in the reserved state. This means that it was presumed to
+ * go away but there are still undropped references. Now, the platform
+ * announced its availability again. Bring back the lingering zdev
+ * to standby. This is safe because we hold a temporary reference
+ * now so that it won't go away. Account for the re-appearance of the
+ * underlying device by incrementing the reference count.
+ */
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ zpci_zdev_get(zdev);
+ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -345,8 +403,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
/* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ else if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
@@ -362,6 +422,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
zpci_update_fh(zdev, ccdf->fh);
}
break;
diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c
new file mode 100644
index 000000000000..35688b645098
--- /dev/null
+++ b/arch/s390/pci/pci_fixup.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exceptions for specific devices,
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ */
+#include <linux/pci.h>
+
+static void zpci_ism_bar_no_mmap(struct pci_dev *pdev)
+{
+ /*
+ * ISM's BAR is special. Drivers written for ISM know
+ * how to handle this but others need to be aware of their
+ * special nature e.g. to prevent attempts to mmap() it.
+ */
+ pdev->non_mappable_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_ISM,
+ zpci_ism_bar_no_mmap);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index f5a75ea7629a..eb978c8012be 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -160,7 +160,7 @@ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
u64 __data;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d20000,%[data],%[req_off]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -229,7 +229,7 @@ static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
u64 __data;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -267,7 +267,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d00000,%[data],%[req_off]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -321,7 +321,7 @@ static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -356,7 +356,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -410,7 +410,7 @@ static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
"0: lhi %[exc],0\n"
"1:\n"
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..191e56a623f6 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
return 0;
}
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus: The bus that the PCI function is on, or would be added on
+ * @zdev: The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
- int i, cand_devfn;
- struct zpci_dev *zdev;
+ int i, vfid, devfn, cand_devfn;
struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
+ return NULL;
+ /* Non-VFs and VFs without RID available don't have a parent */
+ if (!zdev->vfn || !zdev->rid_available)
+ return NULL;
+ /* Linux vfid starts at 0 vfn at 1 */
+ vfid = zdev->vfn - 1;
+ devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ /*
+ * If the parent PF for the given VF is also configured in the
* instance, it must be on the same zbus.
* We can then identify the parent PF by checking what
* devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
if (!pdev)
continue;
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
+ if (cand_devfn == devfn)
+ return pdev;
/* balance pci_get_slot() */
pci_dev_put(pdev);
}
}
+ return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ struct zpci_dev *zdev = to_zpci(virtfn);
+ struct pci_dev *pdev_pf;
+ int rc = 0;
+
+ pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+ if (pdev_pf) {
+ /* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+ rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+ pci_dev_put(pdev_pf);
+ }
return rc;
}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index e3fa4e77fc86..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
#else /* CONFIG_PCI_IOV */
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
@@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
{
return 0;
}
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ return NULL;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 46f99dc164ad..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -32,9 +32,11 @@ static inline int __pcistb_mio_inuser(
u64 len, u8 *status)
{
int cc, exception;
+ bool sacf_flag;
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
"1: lhi %[exc],0\n"
@@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser(
: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
return exception ? -ENXIO : CC_TRANSFORM(cc);
}
@@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser(
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
int cc, exception;
+ bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@@ -64,7 +68,8 @@ static inline int __pcistg_mio_inuser(
* address space. pcistg then uses the user mappings.
*/
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
"4: sllg %[val],%[val],8\n"
@@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser(
CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
@@ -175,8 +181,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
args.address = mmio_addr;
args.vma = vma;
ret = follow_pfnmap_start(&args);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
@@ -200,6 +210,7 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
int cc, exception;
@@ -211,7 +222,8 @@ static inline int __pcilg_mio_inuser(
* user address @dst
*/
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
"1: lhi %[exc],0\n"
@@ -232,10 +244,10 @@ static inline int __pcilg_mio_inuser(
: [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
CC_OUT(cc, cc), [val] "=d" (val),
[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
+ [shift] "+a" (shift)
:
: CC_CLOBBER_LIST("memory"));
-
+ disable_sacf_uaccess(sacf_flag);
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)
@@ -315,14 +327,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out_unlock_mmap;
ret = -EACCES;
- if (!(vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_READ))
goto out_unlock_mmap;
args.vma = vma;
args.address = mmio_addr;
ret = follow_pfnmap_start(&args);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, 0, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..1b494e5ecc4d
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+ u64 timestamp;
+ u64 err_log_id;
+ char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+ struct zpci_report_error_header header;
+ struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+ switch (state) {
+ case pci_channel_io_normal:
+ return "normal";
+ case pci_channel_io_frozen:
+ return "frozen";
+ case pci_channel_io_perm_failure:
+ return "permanent-failure";
+ default:
+ return "invalid";
+ };
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
+{
+ unsigned long sec, usec;
+ unsigned int level;
+ char *except_str;
+ int rc = 0;
+
+ level = entry->level;
+ sec = entry->clock;
+ usec = do_div(sec, USEC_PER_SEC);
+
+ if (entry->exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ",
+ sec, usec, level, except_str,
+ entry->cpu);
+ return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
+{
+ return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n");
+}
+
+static struct debug_view debug_log_view = {
+ "pci_msg_log",
+ &debug_prolog_header,
+ &debug_log_header_fn,
+ &debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev: The PCI device for which to report status
+ * @operation: A string representing the operation reported
+ * @status: A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+ struct zpci_report_error *report;
+ struct pci_driver *driver = NULL;
+ struct pci_dev *pdev = NULL;
+ char *buf, *end;
+ int ret;
+
+ if (!zdev || !zdev->zbus)
+ return -ENODEV;
+
+ /* Protected virtualization hosts get nothing from us */
+ if (prot_virt_guest)
+ return -ENODATA;
+
+ report = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!report)
+ return -ENOMEM;
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ if (pdev)
+ driver = to_pci_driver(pdev->dev.driver);
+
+ buf = report->data.log_data;
+ end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+ buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+ buf += scnprintf(buf, end - buf, "status: %s\n", status);
+ buf += scnprintf(buf, end - buf, "state: %s\n",
+ (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+ buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+ ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+ if (ret < 0)
+ pr_err("Reading PCI debug messages failed with code %d\n", ret);
+ else
+ buf += ret;
+
+ report->header.version = 1;
+ report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+ report->header.length = buf - (char *)&report->data;
+ report->data.timestamp = ktime_get_clocktai_seconds();
+ report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+ ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+ if (ret)
+ pr_err("Reporting PCI status failed with code %d\n", ret);
+ else
+ pr_info("Reported PCI device status\n");
+
+ free_page((unsigned long)report);
+
+ return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 5f46ad58dcd1..0ecad08e1b1e 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled);
static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
- u8 status;
int ret;
pci_stop_and_remove_bus_device(pdev);
@@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
return ret;
}
- ret = zpci_enable_device(zdev);
- if (ret)
- return ret;
+ ret = zpci_reenable_device(zdev);
- if (zdev->dma_table) {
- ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (ret)
- zpci_disable_device(zdev);
- }
return ret;
}
@@ -135,7 +126,7 @@ out:
static DEVICE_ATTR_WO(recover);
static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -145,10 +136,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));
}
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct zpci_report_error_header *report = (void *) buf;
@@ -164,7 +155,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
return ret ? ret : count;
}
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
static ssize_t uid_is_unique_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -203,7 +194,7 @@ const struct attribute_group zpci_ident_attr_group = {
.is_visible = zpci_index_is_visible,
};
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
&bin_attr_util_string,
&bin_attr_report_error,
NULL,
@@ -227,7 +218,7 @@ static struct attribute *zpci_dev_attrs[] = {
const struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
- .bin_attrs = zpci_bin_attrs,
+ .bin_attrs_new = zpci_bin_attrs,
};
static struct attribute *pfip_attrs[] = {