From f5f9e339f4d8d42a5a0e8f9a764653713d6c80b0 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 22 May 2025 14:13:12 +0200 Subject: s390/pci: Remove redundant bus removal and disable from zpci_release_device() commit d76f9633296785343d45f85199f4138cb724b6d2 upstream. Remove zpci_bus_remove_device() and zpci_disable_device() calls from zpci_release_device(). These calls were done when the device transitioned into the ZPCI_FN_STATE_STANDBY state which is guaranteed to happen before it enters the ZPCI_FN_STATE_RESERVED state. When zpci_release_device() is called the device is known to be in the ZPCI_FN_STATE_RESERVED state which is also checked by a WARN_ON(). Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Reviewed-by: Julian Ruess Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/s390/pci/pci.c') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 5bbdc4190b8b..9fcc6d3180f2 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -949,12 +949,6 @@ void zpci_release_device(struct kref *kref) WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); - - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); - if (zdev->has_hp_slot) zpci_exit_slot(zdev); -- cgit v1.2.3 From ea8faa3d225585a196d9603b05c361c98eb9aa5a Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 22 May 2025 14:13:14 +0200 Subject: s390/pci: Allow re-add of a reserved but not yet removed device commit 4b1815a52d7eb03b3e0e6742c6728bc16a4b2d1d upstream. The architecture assumes that PCI functions can be removed synchronously as PCI events are processed. This however clashes with the reference counting of struct pci_dev which allows device drivers to hold on to a struct pci_dev reference even as the underlying device is removed. To bridge this gap commit 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") keeps the struct zpci_dev in ZPCI_FN_STATE_RESERVED state until common code releases the struct pci_dev. Only when all references are dropped, the struct zpci_dev can be removed and freed. Later commit a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") moved the deletion of the struct zpci_dev from the zpci_list in zpci_release_device() to the point where the device is reserved. This was done to prevent handling events for a device that is already being removed, e.g. when the platform generates both PCI event codes 0x304 and 0x308. In retrospect, deletion from the zpci_list in the release function without holding the zpci_list_lock was also racy. A side effect of this handling is that if the underlying device re-appears while the struct zpci_dev is in the ZPCI_FN_STATE_RESERVED state, the new and old instances of the struct zpci_dev and/or struct pci_dev may clash. For example when trying to create the IOMMU sysfs files for the new instance. In this case, re-adding the new instance is aborted. The old instance is removed, and the device will remain absent until the platform issues another event. Fix this by allowing the struct zpci_dev to be brought back up right until it is finally removed. To this end also keep the struct zpci_dev in the zpci_list until it is finally released when all references have been dropped. Deletion from the zpci_list from within the release function is made safe by using kref_put_lock() with the zpci_list_lock. This ensures that the releasing code holds the last reference. Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci.c | 32 ++++++++++++++++++++++---------- arch/s390/pci/pci_bus.h | 7 ++----- arch/s390/pci/pci_event.c | 22 +++++++++++++++++++++- 3 files changed, 45 insertions(+), 16 deletions(-) (limited to 'arch/s390/pci/pci.c') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9fcc6d3180f2..4602abd0c6f1 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -70,6 +70,13 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -925,21 +932,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } @@ -948,6 +954,12 @@ void zpci_release_device(struct kref *kref) struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); if (zdev->has_hp_slot) zpci_exit_slot(zdev); diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e86a9419d233..ae3d7a9159bd 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7bd7721c1239..2fbee3887d13 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -335,6 +335,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -358,8 +374,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -375,6 +393,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; -- cgit v1.2.3 From 9c3e9d11424b5c4d2e349478f23465c153c6f98c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 22 May 2025 14:13:15 +0200 Subject: s390/pci: Serialize device addition and removal commit 774a1fa880bc949d88b5ddec9494a13be733dfa8 upstream. Prior changes ensured that when zpci_release_device() is called and it removed the zdev from the zpci_list this instance can not be found via the zpci_list anymore even while allowing re-add of reserved devices. This only accounts for the overall lifetime and zpci_list addition and removal, it does not yet prevent concurrent add of a new instance for the same underlying device. Such concurrent add would subsequently cause issues such as attempted re-use of the same IOMMU sysfs directory and is generally undesired. Introduce a new zpci_add_remove_lock mutex to serialize adding a new device with removal. Together this ensures that if a struct zpci_dev is not found in the zpci_list it was either already removed and torn down, or its removal and tear down is in progress with the zpci_add_remove_lock held. Cc: stable@vger.kernel.org Fixes: a46044a92add ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer Tested-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/s390/pci/pci.c') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 4602abd0c6f1..cd6676c2d602 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -45,6 +45,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -74,7 +75,9 @@ void zpci_zdev_put(struct zpci_dev *zdev) { if (!zdev) return; + mutex_lock(&zpci_add_remove_lock); kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); } struct zpci_dev *get_zdev_by_fid(u32 fid) @@ -844,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -857,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -953,6 +959,7 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + lockdep_assert_held(&zpci_add_remove_lock); WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); /* * We already hold zpci_list_lock thanks to kref_put_lock(). -- cgit v1.2.3