summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ufs2
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml3
-rw-r--r--Documentation/networking/tls.rst4
-rw-r--r--Documentation/process/maintainer-netdev.rst2
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/s390/pci/pci_event.c59
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/block/ublk_drv.c11
-rw-r--r--drivers/infiniband/core/cache.c4
-rw-r--r--drivers/infiniband/core/umem_odp.c11
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c10
-rw-r--r--drivers/infiniband/hw/mlx5/main.c33
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c61
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c5
-rw-r--r--drivers/iommu/intel/cache.c5
-rw-r--r--drivers/iommu/intel/iommu.c11
-rw-r--r--drivers/iommu/intel/iommu.h2
-rw-r--r--drivers/iommu/rockchip-iommu.c3
-rw-r--r--drivers/mfd/88pm860x-core.c3
-rw-r--r--drivers/mfd/max8925-core.c6
-rw-r--r--drivers/mfd/twl4030-irq.c3
-rw-r--r--drivers/mmc/core/quirks.h12
-rw-r--r--drivers/mmc/core/sd_uhs2.c4
-rw-r--r--drivers/mmc/host/mtk-sd.c21
-rw-r--r--drivers/mmc/host/sdhci-of-k1.c3
-rw-r--r--drivers/mmc/host/sdhci-uhs2.c20
-rw-r--r--drivers/mmc/host/sdhci.c9
-rw-r--r--drivers/mmc/host/sdhci.h16
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c13
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c24
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h4
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.c79
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c26
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.c23
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq_api.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c12
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c10
-rw-r--r--drivers/net/ethernet/sun/niu.c31
-rw-r--r--drivers/net/ethernet/sun/niu.h4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c27
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_sriov.c4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h3
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c4
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c8
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c22
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h4
-rw-r--r--drivers/net/usb/lan78xx.c2
-rw-r--r--drivers/net/virtio_net.c111
-rw-r--r--drivers/nvme/host/core.c18
-rw-r--r--drivers/nvme/host/multipath.c8
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/rtc/rtc-cmos.c10
-rw-r--r--drivers/rtc/rtc-pcf2127.c7
-rw-r--r--drivers/rtc/rtc-s5m.c197
-rw-r--r--drivers/scsi/hosts.c18
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c2
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c2
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/ufs/core/ufs-sysfs.c4
-rw-r--r--drivers/virtio/virtio_ring.c8
-rw-r--r--fs/anon_inodes.c23
-rw-r--r--fs/bcachefs/bcachefs.h11
-rw-r--r--fs/bcachefs/btree_io.c43
-rw-r--r--fs/bcachefs/btree_iter.c2
-rw-r--r--fs/bcachefs/dirent.c19
-rw-r--r--fs/bcachefs/dirent.h3
-rw-r--r--fs/bcachefs/fs.c7
-rw-r--r--fs/bcachefs/fsck.c4
-rw-r--r--fs/bcachefs/inode.c13
-rw-r--r--fs/bcachefs/opts.h5
-rw-r--r--fs/bcachefs/sb-errors_format.h2
-rw-r--r--fs/bcachefs/str_hash.c5
-rw-r--r--fs/bcachefs/str_hash.h2
-rw-r--r--fs/bcachefs/super.c31
-rw-r--r--fs/btrfs/block-group.h2
-rw-r--r--fs/btrfs/free-space-tree.c40
-rw-r--r--fs/btrfs/inode.c36
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/tree-log.c137
-rw-r--r--fs/eventpoll.c458
-rw-r--r--fs/exec.c9
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/namei.c2
-rw-r--r--fs/netfs/buffered_write.c38
-rw-r--r--fs/netfs/direct_write.c16
-rw-r--r--fs/netfs/internal.h26
-rw-r--r--fs/netfs/main.c6
-rw-r--r--fs/netfs/misc.c50
-rw-r--r--fs/netfs/read_collect.c16
-rw-r--r--fs/netfs/write_collect.c14
-rw-r--r--fs/netfs/write_retry.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c118
-rw-r--r--fs/nfs/inode.c17
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/smb/client/cifssmb.c22
-rw-r--r--fs/smb/client/smb2pdu.c27
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c41
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c31
-rw-r--r--fs/xfs/xfs_buf.c38
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c295
-rw-r--r--fs/xfs/xfs_buf_item.h3
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode_item.c5
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_mru_cache.c19
-rw-r--r--fs/xfs/xfs_qm.c86
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c5
-rw-r--r--fs/xfs/xfs_trace.h10
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_zone_alloc.c42
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/netfs.h21
-rw-r--r--include/trace/events/netfs.h29
-rw-r--r--io_uring/io_uring.c3
-rw-r--r--lib/test_objagg.c4
-rw-r--r--mm/secretmem.c9
-rw-r--r--net/bluetooth/hci_event.c36
-rw-r--r--net/bluetooth/hci_sync.c227
-rw-r--r--net/bluetooth/mgmt.c25
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/rose/rose_route.c15
-rw-r--r--net/sched/sch_api.c19
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c5
-rw-r--r--tools/testing/selftests/iommu/iommufd.c40
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h9
141 files changed, 1957 insertions, 1396 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index d4140dc6c5ba..615453fcc9ff 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of
The file is read only.
-What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
+What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the total physical memory resources. This is
diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
index 4dd2dc9c678b..8afbd9ebd73f 100644
--- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
@@ -80,6 +80,8 @@ examples:
interrupt-parent = <&intc>;
interrupts = <296 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "macirq";
+ phy-handle = <&phy0>;
+ phy-mode = "rgmii-id";
resets = <&rst 30>;
reset-names = "stmmaceth";
snps,multicast-filter-bins = <0>;
@@ -91,7 +93,6 @@ examples:
snps,mtl-rx-config = <&gmac0_mtl_rx_setup>;
snps,mtl-tx-config = <&gmac0_mtl_tx_setup>;
snps,axi-config = <&gmac0_stmmac_axi_setup>;
- status = "disabled";
gmac0_mtl_rx_setup: rx-queues-config {
snps,rx-queues-to-use = <8>;
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index c7904a1bc167..36cc7afc2527 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -16,11 +16,13 @@ User interface
Creating a TLS connection
-------------------------
-First create a new TCP socket and set the TLS ULP.
+First create a new TCP socket and once the connection is established set the
+TLS ULP.
.. code-block:: c
sock = socket(AF_INET, SOCK_STREAM, 0);
+ connect(sock, addr, addrlen);
setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls"));
Setting the TLS ULP allows us to set/get TLS socket options. Currently
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 1ac62dc3a66f..e1755610b4bc 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork
(as of patchwork 2.2.2).
Co-posting selftests
---------------------
+~~~~~~~~~~~~~~~~~~~~
Selftests should be part of the same series as the code changes.
Specifically for fixes both code change and related test should go into
diff --git a/MAINTAINERS b/MAINTAINERS
index 4bac4ea21b64..fad6cb025a19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15550,6 +15550,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_*
MELLANOX ETHERNET DRIVER (mlx5e)
M: Saeed Mahameed <saeedm@nvidia.com>
M: Tariq Toukan <tariqt@nvidia.com>
+M: Mark Bloch <mbloch@nvidia.com>
L: netdev@vger.kernel.org
S: Maintained
W: https://www.nvidia.com/networking/
@@ -15619,6 +15620,7 @@ MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@nvidia.com>
M: Leon Romanovsky <leonro@nvidia.com>
M: Tariq Toukan <tariqt@nvidia.com>
+M: Mark Bloch <mbloch@nvidia.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Maintained
@@ -21198,7 +21200,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml
+F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml
F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c
RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER
@@ -22586,9 +22588,11 @@ S: Maintained
F: drivers/misc/sgi-xp/
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
+M: D. Wythe <alibuda@linux.alibaba.com>
+M: Dust Li <dust.li@linux.alibaba.com>
+M: Sidraya Jayagond <sidraya@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
-M: Jan Karcher <jaka@linux.ibm.com>
-R: D. Wythe <alibuda@linux.alibaba.com>
+R: Mahanta Jambigi <mjambigi@linux.ibm.com>
R: Tony Lu <tonylu@linux.alibaba.com>
R: Wen Gu <guwen@linux.alibaba.com>
L: linux-rdma@vger.kernel.org
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
goto out_unlock;
}
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
@@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
@@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b1be6c510372..0c2eabe14af3 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- rcu_read_lock();
- if (!page)
+ if (!page) {
+ rcu_read_lock();
return ERR_PTR(-ENOMEM);
+ }
xa_lock(&brd->brd_pages);
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
page, gfp);
+ rcu_read_lock();
if (ret) {
xa_unlock(&brd->brd_pages);
__free_page(page);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index c3e3c3b65a6d..9fd284fa76dc 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1442,15 +1442,16 @@ static void ublk_queue_rqs(struct rq_list *rqlist)
struct ublk_queue *this_q = req->mq_hctx->driver_data;
struct ublk_io *this_io = &this_q->ios[req->tag];
+ if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) {
+ rq_list_add_tail(&requeue_list, req);
+ continue;
+ }
+
if (io && !ublk_belong_to_same_batch(io, this_io) &&
!rq_list_empty(&submit_list))
ublk_queue_cmd_list(io, &submit_list);
io = this_io;
-
- if (ublk_prep_req(this_q, req, true) == BLK_STS_OK)
- rq_list_add_tail(&submit_list, req);
- else
- rq_list_add_tail(&requeue_list, req);
+ rq_list_add_tail(&submit_list, req);
}
if (!rq_list_empty(&submit_list))
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 9979a351577f..81cf3c902e81 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
out_unlock:
mutex_unlock(&table->lock);
if (ret)
- pr_warn("%s: unable to add gid %pI6 error=%d\n",
- __func__, gid->raw, ret);
+ pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
return ret;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index c752ae9fad6c..b1c44ec1a3f3 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
end = ALIGN(end, page_size);
if (unlikely(end < page_size))
return -EOVERFLOW;
+ /*
+ * The mmu notifier can be called within reclaim contexts and takes the
+ * umem_mutex. This is rare to trigger in testing, teach lockdep about
+ * it.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ mutex_lock(&umem_odp->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
+ fs_reclaim_release(GFP_KERNEL);
+ }
nr_entries = (end - start) >> PAGE_SHIFT;
if (!(nr_entries * PAGE_SIZE / page_size))
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index b847084dcd99..a506fafd2b15 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
- ret = mlx5_lag_query_cong_counters(dev->mdev,
+ ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 2479da8620ca..843dcd312242 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
@@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
- struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
+ struct mlx5_async_cmd *async_cmd;
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
int head = 0;
int tail = 0;
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
@@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
+
+ kfree(async_cmd);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ce7610740412..df6557ddbdfc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+}
+
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
return true;
unbind:
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 57f9bc2a4a3a..bd35e75d9ce5 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
- bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
- !to_ib_umem_dmabuf(mr->umem)->pinned;
- bool from_cache = !!ent;
- int ret = 0;
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
- dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
@@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);
- goto out;
+ return 0;
}
if (ent) {
@@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
ret = destroy_mkey(dev, mr);
-out:
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2075,9 +2108,9 @@ out:
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
- dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
-
return ret;
}
@@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
+ rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index eaa2f9f5f3a9..f6abd64f07f7 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
}
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
- __xa_erase(&mr_to_mdev(mr)->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
- ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL);
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
__xa_erase(&imr->implicit_children, idx);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1378651735f6..23ed2fc688f0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev,
target_host->max_id = 1;
target_host->max_lun = -1LL;
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
- target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
- if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
+ if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
+ target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
+ else
target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
target = host_to_target(target_host);
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index fc35cba59145..47692cbfaabd 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
}
/* Assign a cache tag with specified type to domain. */
-static int cache_tag_assign(struct dmar_domain *domain, u16 did,
- struct device *dev, ioasid_t pasid,
- enum cache_tag_type type)
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7aa3932251b2..148b944143b8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev)
!pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev))
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
iommu_enable_pci_ats(info);
+ /* Assign a DEVTLB cache tag to the default domain. */
+ if (info->ats_enabled && info->domain) {
+ u16 did = domain_id_iommu(info->domain, iommu);
+
+ if (cache_tag_assign(info->domain, did, dev,
+ IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
+ iommu_disable_pci_ats(info);
+ }
+ }
iommu_enable_pci_pri(info);
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 3ddbcc603de2..2d1afab5eedc 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1289,6 +1289,8 @@ struct cache_tag {
unsigned int users;
};
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type);
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 22f74ba33a0e..e6bb3c784017 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev,
return -ENOMEM;
data->iommu = platform_get_drvdata(iommu_dev);
- data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
+ iommu->domain = &rk_identity_domain;
+
platform_set_drvdata(pdev, iommu);
iommu->dev = dev;
iommu->num_mmu = 0;
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 488e346047c1..77230fbe07be 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip,
unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
int data, mask, ret = -EINVAL;
int nr_irqs, irq_base = -1;
- struct device_node *node = i2c->dev.of_node;
mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
| PM8607_B0_MISC1_INT_MASK;
@@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip,
ret = -EBUSY;
goto out;
}
- irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0,
+ irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0,
&pm860x_irq_domain_ops, chip);
chip->core_irq = i2c->irq;
if (!chip->core_irq)
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 78b16c67a5fc..25377dcce60e 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
{
unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
int ret;
- struct device_node *node = chip->dev->of_node;
/* clear all interrupts */
max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1);
@@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
return -EBUSY;
}
- irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0,
- &max8925_irq_domain_ops, chip);
+ irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS,
+ chip->irq_base, 0, &max8925_irq_domain_ops,
+ chip);
/* request irq handler for pmic main irq*/
chip->core_irq = irq;
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 232c2bfe8c18..d3ab40651307 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num)
static struct irq_chip twl4030_irq_chip;
int status, i;
int irq_base, irq_end, nr_irqs;
- struct device_node *node = dev->of_node;
/*
* TWL core and pwr interrupts must be contiguous because
@@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num)
return irq_base;
}
- irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0,
+ irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0,
&irq_domain_simple_ops, NULL);
irq_end = irq_base + TWL4030_CORE_NR_IRQS;
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 7f893bafaa60..c417ed34c057 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = {
0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY),
+ /*
+ * Some SD cards reports discard support while they don't
+ */
+ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
+ MMC_QUIRK_BROKEN_SD_DISCARD),
+
END_FIXUP
};
@@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
MMC_QUIRK_TRIM_BROKEN),
- /*
- * Some SD cards reports discard support while they don't
- */
- MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
- MMC_QUIRK_BROKEN_SD_DISCARD),
-
END_FIXUP
};
diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c
index 1c31d0dfa961..de17d1611290 100644
--- a/drivers/mmc/core/sd_uhs2.c
+++ b/drivers/mmc/core/sd_uhs2.c
@@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host)
err = host->ops->uhs2_control(host, UHS2_PHY_INIT);
if (err) {
- pr_err("%s: failed to initial phy for UHS-II!\n",
- mmc_hostname(host));
+ pr_debug("%s: failed to initial phy for UHS-II!\n",
+ mmc_hostname(host));
}
return err;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 31eb90536bce..d7020e06dd55 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma,
static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data)
{
if (!(data->host_cookie & MSDC_PREPARE_FLAG)) {
- data->host_cookie |= MSDC_PREPARE_FLAG;
data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len,
mmc_get_dma_dir(data));
+ if (data->sg_count)
+ data->host_cookie |= MSDC_PREPARE_FLAG;
}
}
+static bool msdc_data_prepared(struct mmc_data *data)
+{
+ return data->host_cookie & MSDC_PREPARE_FLAG;
+}
+
static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data)
{
if (data->host_cookie & MSDC_ASYNC_FLAG)
@@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq)
WARN_ON(!host->hsq_en && host->mrq);
host->mrq = mrq;
- if (mrq->data)
+ if (mrq->data) {
msdc_prepare_data(host, mrq->data);
+ if (!msdc_data_prepared(mrq->data)) {
+ host->mrq = NULL;
+ /*
+ * Failed to prepare DMA area, fail fast before
+ * starting any commands.
+ */
+ mrq->cmd->error = -ENOSPC;
+ mmc_request_done(mmc_from_priv(host), mrq);
+ return;
+ }
+ }
/* if SBC is required, we have HW option and SW option.
* if HW option is enabled, and SBC does not have "special" flags,
diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c
index 6880d3e9ab62..2e5da7c5834c 100644
--- a/drivers/mmc/host/sdhci-of-k1.c
+++ b/drivers/mmc/host/sdhci-of-k1.c
@@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev)
host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
- if (spacemit_sdhci_get_clocks(dev, pltfm_host))
+ ret = spacemit_sdhci_get_clocks(dev, pltfm_host);
+ if (ret)
goto err_pltfm;
ret = sdhci_add_host(host);
diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c
index c53b64d50c0d..0efeb9d0c376 100644
--- a/drivers/mmc/host/sdhci-uhs2.c
+++ b/drivers/mmc/host/sdhci-uhs2.c
@@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask)
/* hw clears the bit when it's done */
if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10,
UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) {
- pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__,
- mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc));
+ pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__,
+ mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc));
sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET);
return;
}
@@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT),
100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true,
host, SDHCI_PRESENT_STATE)) {
- pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms");
return -EIO;
}
@@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC),
100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) {
- pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms");
return -EIO;
}
@@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc)
host->ops->uhs2_pre_detect_init(host);
if (sdhci_uhs2_interface_detect(host)) {
- pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc));
+ pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc));
return -EIO;
}
if (sdhci_uhs2_init(host)) {
- pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc));
+ pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc));
return -EIO;
}
@@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE),
100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host,
SDHCI_PRESENT_STATE)) {
- pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms");
return -EIO;
}
return 0;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f008167d1863..e116f2db34d5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
host->mmc->actual_clock = 0;
- clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- if (clk & SDHCI_CLOCK_CARD_EN)
- sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN,
- SDHCI_CLOCK_CONTROL);
+ sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
- if (clock == 0) {
- sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
+ if (clock == 0)
return;
- }
clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
sdhci_enable_clk(host, clk);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index f9d65dd0f2b2..70ada1857a4c 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en);
void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable);
void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd);
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define SDHCI_DBG_ANYWAY 0
+#elif defined(DEBUG)
+#define SDHCI_DBG_ANYWAY 1
+#else
+#define SDHCI_DBG_ANYWAY 0
+#endif
+
+#define sdhci_dbg_dumpregs(host, fmt) \
+do { \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \
+ sdhci_dumpregs(host); \
+} while (0)
+
#endif /* __SDHCI_HW_H */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index e1296cbf4ff3..9316de4126cf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1269,6 +1269,8 @@
#define MDIO_VEND2_CTRL1_SS13 BIT(13)
#endif
+#define XGBE_VEND2_MAC_AUTO_SW BIT(9)
+
/* MDIO mask values */
#define XGBE_AN_CL73_INT_CMPLT BIT(0)
#define XGBE_AN_CL73_INC_LINK BIT(1)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 71449edbb76d..1a37ec45e650 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable,
reg |= MDIO_VEND2_CTRL1_AN_RESTART;
XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg);
+
+ reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL);
+ reg |= XGBE_VEND2_MAC_AUTO_SW;
+ XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg);
}
static void xgbe_an37_restart(struct xgbe_prv_data *pdata)
@@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata)
netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n",
(pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII");
+
+ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1);
+ reg &= ~MDIO_AN_CTRL1_ENABLE;
+ XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg);
+
}
static void xgbe_an73_init(struct xgbe_prv_data *pdata)
@@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata,
&an_restart);
+ /* bail out if the link status register read fails */
+ if (pdata->phy.link < 0)
+ return;
+
if (an_restart) {
xgbe_phy_config_aneg(pdata);
goto adjust_link;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 7a4dfa4e19c7..23c39e92e783 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int reg;
- int ret;
+ int reg, ret;
*an_restart = 0;
@@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
return 0;
}
- /* Link status is latched low, so read once to clear
- * and then read again to get current state
- */
- reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+ if (reg < 0)
+ return reg;
+
+ /* Link status is latched low so that momentary link drops
+ * can be detected. If link was already down read again
+ * to get the latest state.
+ */
+
+ if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) {
+ reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+ if (reg < 0)
+ return reg;
+ }
if (pdata->en_rx_adap) {
/* if the link is available and adaptation is done,
@@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
xgbe_phy_set_mode(pdata, phy_data->cur_mode);
}
- /* check again for the link and adaptation status */
- reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
- if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done)
+ if (pdata->rx_adapt_done)
return 1;
} else if (reg & MDIO_STAT1_LSTATUS)
return 1;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 6359bb87dc13..057379cd43ba 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -183,12 +183,12 @@
#define XGBE_LINK_TIMEOUT 5
#define XGBE_KR_TRAINING_WAIT_ITER 50
-#define XGBE_SGMII_AN_LINK_STATUS BIT(1)
+#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1)
#define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3))
#define XGBE_SGMII_AN_LINK_SPEED_10 0x00
#define XGBE_SGMII_AN_LINK_SPEED_100 0x04
#define XGBE_SGMII_AN_LINK_SPEED_1000 0x08
-#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4)
+#define XGBE_SGMII_AN_LINK_STATUS BIT(4)
/* ECC correctable error notification window (seconds) */
#define XGBE_ECC_LIMIT 60
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index cfdb546a09e7..98a4d089270e 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
break;
}
- buffer_info->alloced = 1;
- buffer_info->skb = skb;
- buffer_info->length = (u16) adapter->rx_buffer_len;
page = virt_to_page(skb->data);
offset = offset_in_page(skb->data);
buffer_info->dma = dma_map_page(&pdev->dev, page, offset,
adapter->rx_buffer_len,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+ kfree_skb(skb);
+ adapter->soft_stats.rx_dropped++;
+ break;
+ }
+
+ buffer_info->alloced = 1;
+ buffer_info->skb = skb;
+ buffer_info->length = (u16)adapter->rx_buffer_len;
+
rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len);
rfd_desc->coalese = 0;
@@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
return 0;
}
-static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
- struct tx_packet_desc *ptpd)
+static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
+ struct tx_packet_desc *ptpd)
{
struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring;
struct atl1_buffer *buffer_info;
@@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
unsigned int nr_frags;
unsigned int f;
int retval;
+ u16 first_mapped;
u16 next_to_use;
u16 data_len;
u8 hdr_len;
@@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buf_len -= skb->data_len;
nr_frags = skb_shinfo(skb)->nr_frags;
next_to_use = atomic_read(&tpd_ring->next_to_use);
+ first_mapped = next_to_use;
buffer_info = &tpd_ring->buffer_info[next_to_use];
BUG_ON(buffer_info->skb);
/* put skb in last TPD */
@@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
offset, hdr_len,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
@@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
page, offset,
buffer_info->length,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
offset, buf_len,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev,
frag, i * ATL1_MAX_TX_BUF_LEN,
buffer_info->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
@@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
/* last tpd's buffer-info */
buffer_info->skb = skb;
+
+ return true;
+
+ dma_err:
+ while (first_mapped != next_to_use) {
+ buffer_info = &tpd_ring->buffer_info[first_mapped];
+ dma_unmap_page(&adapter->pdev->dev,
+ buffer_info->dma,
+ buffer_info->length,
+ DMA_TO_DEVICE);
+ buffer_info->dma = 0;
+
+ if (++first_mapped == tpd_ring->count)
+ first_mapped = 0;
+ }
+ return false;
}
static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count,
@@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
len = skb_headlen(skb);
- if (unlikely(skb->len <= 0)) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (unlikely(skb->len <= 0))
+ goto drop_packet;
nr_frags = skb_shinfo(skb)->nr_frags;
for (f = 0; f < nr_frags; f++) {
@@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
if (mss) {
if (skb->protocol == htons(ETH_P_IP)) {
proto_hdr_len = skb_tcp_all_headers(skb);
- if (unlikely(proto_hdr_len > len)) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (unlikely(proto_hdr_len > len))
+ goto drop_packet;
+
/* need additional TPD ? */
if (proto_hdr_len != len)
count += (len - proto_hdr_len +
@@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
}
tso = atl1_tso(adapter, skb, ptpd);
- if (tso < 0) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (tso < 0)
+ goto drop_packet;
if (!tso) {
ret_val = atl1_tx_csum(adapter, skb, ptpd);
- if (ret_val < 0) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (ret_val < 0)
+ goto drop_packet;
}
- atl1_tx_map(adapter, skb, ptpd);
+ if (!atl1_tx_map(adapter, skb, ptpd))
+ goto drop_packet;
+
atl1_tx_queue(adapter, count, ptpd);
atl1_update_mailbox(adapter);
return NETDEV_TX_OK;
+
+drop_packet:
+ adapter->soft_stats.tx_errors++;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
}
static int atl1_rings_clean(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 773f5ad972a2..6bc8dfdb3d4b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu)
if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
return -EOPNOTSUPP;
- if (netdev->mtu > enic->port_mtu)
+ if (new_mtu > enic->port_mtu)
netdev_warn(netdev,
"interface MTU (%d) set higher than port MTU (%d)\n",
- netdev->mtu, enic->port_mtu);
+ new_mtu, enic->port_mtu);
return _enic_change_mtu(netdev, new_mtu);
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 2ec2c3dab250..b82f121cadad 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv,
MEM_TYPE_PAGE_ORDER0, NULL);
if (err) {
dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n");
+ xdp_rxq_info_unreg(&fq->channel->xdp_rxq);
return err;
}
@@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv)
return -EINVAL;
}
if (err)
- return err;
+ goto out;
}
err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token,
DPNI_QUEUE_TX, &priv->tx_qdid);
if (err) {
dev_err(dev, "dpni_get_qdid() failed\n");
- return err;
+ goto out;
}
return 0;
+
+out:
+ while (i--) {
+ if (priv->fq[i].type == DPAA2_RX_FQ &&
+ xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq))
+ xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq);
+ }
+ return err;
}
/* Allocate rings for storing incoming frame descriptors */
@@ -4825,6 +4834,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv)
}
}
+static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->num_fqs; i++) {
+ if (priv->fq[i].type == DPAA2_RX_FQ &&
+ xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq))
+ xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq);
+ }
+}
+
static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
{
struct device *dev;
@@ -5028,6 +5048,7 @@ err_alloc_percpu_extras:
free_percpu(priv->percpu_stats);
err_alloc_percpu_stats:
dpaa2_eth_del_ch_napi(priv);
+ dpaa2_eth_free_rx_xdp_rxq(priv);
err_bind:
dpaa2_eth_free_dpbps(priv);
err_dpbp_setup:
@@ -5080,6 +5101,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
free_percpu(priv->percpu_extras);
dpaa2_eth_del_ch_napi(priv);
+ dpaa2_eth_free_rx_xdp_rxq(priv);
dpaa2_eth_free_dpbps(priv);
dpaa2_eth_free_dpio(priv);
dpaa2_eth_free_dpni(priv);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
index b28991dd1870..48b8e184f3db 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq)
*/
static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
{
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
/* free ring buffers and the ring itself */
idpf_ctlq_dealloc_ring_res(hw, cq);
@@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
/* Set ring_size to 0 to indicate uninitialized queue */
cq->ring_size = 0;
- mutex_unlock(&cq->cq_lock);
- mutex_destroy(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
}
/**
@@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
idpf_ctlq_init_regs(hw, cq, is_rxq);
- mutex_init(&cq->cq_lock);
+ spin_lock_init(&cq->cq_lock);
list_add(&cq->cq_list, &hw->cq_list_head);
@@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
int err = 0;
int i;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
/* Ensure there are enough descriptors to send all messages */
num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq);
@@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
wr32(hw, cq->reg.tail, cq->next_to_use);
err_unlock:
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
return err;
}
@@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
if (*clean_count > cq->ring_size)
return -EBADR;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
ntc = cq->next_to_clean;
@@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
cq->next_to_clean = ntc;
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
/* Return number of descriptors actually cleaned */
*clean_count = i;
@@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
if (*buff_count > 0)
buffs_avail = true;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
if (tbp >= cq->ring_size)
tbp = 0;
@@ -524,7 +523,7 @@ post_buffs_out:
wr32(hw, cq->reg.tail, cq->next_to_post);
}
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
/* return the number of buffers that were not posted */
*buff_count = *buff_count - i;
@@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
u16 i;
/* take the lock before we start messing with the ring */
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
ntc = cq->next_to_clean;
@@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
cq->next_to_clean = ntc;
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
*num_q_msg = i;
if (*num_q_msg == 0)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
index 9642494a67d8..3414c5f9a831 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
@@ -99,7 +99,7 @@ struct idpf_ctlq_info {
enum idpf_ctlq_type cq_type;
int q_id;
- struct mutex cq_lock; /* control queue lock */
+ spinlock_t cq_lock; /* control queue lock */
/* used for interrupt processing */
u16 next_to_use;
u16 next_to_clean;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 9bdb309b668e..eaf7a2606faa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev)
struct idpf_vport_user_config_data *user_config;
if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
- return -EOPNOTSUPP;
+ return 0;
user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
@@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev)
struct idpf_vport_user_config_data *user_config;
if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
- return -EOPNOTSUPP;
+ return 0;
user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 4eb20ec2accb..80382ff4a5fa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size)
struct idpf_adapter *adapter = hw->back;
size_t sz = ALIGN(size, 4096);
- mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz,
- &mem->pa, GFP_KERNEL);
+ /* The control queue resources are freed under a spinlock, contiguous
+ * pages will avoid IOMMU remapping and the use vmap (and vunmap in
+ * dma_free_*() path.
+ */
+ mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa,
+ GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS);
mem->size = sz;
return mem->va;
@@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
{
struct idpf_adapter *adapter = hw->back;
- dma_free_coherent(&adapter->pdev->dev, mem->size,
- mem->va, mem->pa);
+ dma_free_attrs(&adapter->pdev->dev, mem->size,
+ mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS);
mem->size = 0;
mem->va = NULL;
mem->pa = 0;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 686793c539f2..031c332f66c4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7115,6 +7115,10 @@ static int igc_probe(struct pci_dev *pdev,
adapter->port_num = hw->bus.func;
adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+ /* Disable ASPM L1.2 on I226 devices to avoid packet loss */
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+
err = pci_save_state(pdev);
if (err)
goto err_ioremap;
@@ -7500,6 +7504,9 @@ static int __igc_resume(struct device *dev, bool rpm)
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+
if (igc_init_interrupt_scheme(adapter, true)) {
netdev_err(netdev, "Unable to allocate memory for queues\n");
return -ENOMEM;
@@ -7625,6 +7632,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2);
+
/* In case of PCI error, adapter loses its HW address
* so we should re-assign it here.
*/
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index ddca8fc7883e..26119d02a94d 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp,
addr = np->ops->map_page(np->device, page, 0,
PAGE_SIZE, DMA_FROM_DEVICE);
- if (!addr) {
+ if (np->ops->mapping_error(np->device, addr)) {
__free_page(page);
return -ENOMEM;
}
@@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
len = skb_headlen(skb);
mapping = np->ops->map_single(np->device, skb->data,
len, DMA_TO_DEVICE);
+ if (np->ops->mapping_error(np->device, mapping))
+ goto out_drop;
prod = rp->prod;
@@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
mapping = np->ops->map_page(np->device, skb_frag_page(frag),
skb_frag_off(frag), len,
DMA_TO_DEVICE);
+ if (np->ops->mapping_error(np->device, mapping))
+ goto out_unmap;
rp->tx_buffs[prod].skb = NULL;
rp->tx_buffs[prod].mapping = mapping;
@@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
out:
return NETDEV_TX_OK;
+out_unmap:
+ while (i--) {
+ const skb_frag_t *frag;
+
+ prod = PREVIOUS_TX(rp, prod);
+ frag = &skb_shinfo(skb)->frags[i];
+ np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping,
+ skb_frag_size(frag), DMA_TO_DEVICE);
+ }
+
+ np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping,
+ skb_headlen(skb), DMA_TO_DEVICE);
+
out_drop:
rp->tx_errors++;
kfree_skb(skb);
@@ -9644,6 +9661,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address,
dma_unmap_single(dev, dma_address, size, direction);
}
+static int niu_pci_mapping_error(struct device *dev, u64 addr)
+{
+ return dma_mapping_error(dev, addr);
+}
+
static const struct niu_ops niu_pci_ops = {
.alloc_coherent = niu_pci_alloc_coherent,
.free_coherent = niu_pci_free_coherent,
@@ -9651,6 +9673,7 @@ static const struct niu_ops niu_pci_ops = {
.unmap_page = niu_pci_unmap_page,
.map_single = niu_pci_map_single,
.unmap_single = niu_pci_unmap_single,
+ .mapping_error = niu_pci_mapping_error,
};
static void niu_driver_version(void)
@@ -10019,6 +10042,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address,
/* Nothing to do. */
}
+static int niu_phys_mapping_error(struct device *dev, u64 dma_address)
+{
+ return false;
+}
+
static const struct niu_ops niu_phys_ops = {
.alloc_coherent = niu_phys_alloc_coherent,
.free_coherent = niu_phys_free_coherent,
@@ -10026,6 +10054,7 @@ static const struct niu_ops niu_phys_ops = {
.unmap_page = niu_phys_unmap_page,
.map_single = niu_phys_map_single,
.unmap_single = niu_phys_unmap_single,
+ .mapping_error = niu_phys_mapping_error,
};
static int niu_of_probe(struct platform_device *op)
diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h
index 04c215f91fc0..0b169c08b0f2 100644
--- a/drivers/net/ethernet/sun/niu.h
+++ b/drivers/net/ethernet/sun/niu.h
@@ -2879,6 +2879,9 @@ struct tx_ring_info {
#define NEXT_TX(tp, index) \
(((index) + 1) < (tp)->pending ? ((index) + 1) : 0)
+#define PREVIOUS_TX(tp, index) \
+ (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1))
+
static inline u32 niu_tx_avail(struct tx_ring_info *tp)
{
return (tp->pending -
@@ -3140,6 +3143,7 @@ struct niu_ops {
enum dma_data_direction direction);
void (*unmap_single)(struct device *dev, u64 dma_address,
size_t size, enum dma_data_direction direction);
+ int (*mapping_error)(struct device *dev, u64 dma_address);
};
struct niu_link_config {
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index c57cc4f27249..55e252789db3 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1705,6 +1705,7 @@ static void wx_set_rss_queues(struct wx *wx)
clear_bit(WX_FLAG_FDIR_HASH, wx->flags);
+ wx->ring_feature[RING_F_FDIR].indices = 1;
/* Use Flow Director in addition to RSS to ensure the best
* distribution of flows across cores, even when an FDIR flow
* isn't matched.
@@ -1746,7 +1747,7 @@ static void wx_set_num_queues(struct wx *wx)
*/
static int wx_acquire_msix_vectors(struct wx *wx)
{
- struct irq_affinity affd = { .pre_vectors = 1 };
+ struct irq_affinity affd = { .post_vectors = 1 };
int nvecs, i;
/* We start by asking for one vector per queue pair */
@@ -1783,16 +1784,24 @@ static int wx_acquire_msix_vectors(struct wx *wx)
return nvecs;
}
- wx->msix_entry->entry = 0;
- wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0);
nvecs -= 1;
for (i = 0; i < nvecs; i++) {
wx->msix_q_entries[i].entry = i;
- wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1);
+ wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i);
}
wx->num_q_vectors = nvecs;
+ wx->msix_entry->entry = nvecs;
+ wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs);
+
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) {
+ wx->msix_entry->entry = 0;
+ wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0);
+ wx->msix_q_entries[0].entry = 0;
+ wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1);
+ }
+
return 0;
}
@@ -2291,6 +2300,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction,
if (direction == -1) {
/* other causes */
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags))
+ msix_vector = 0;
msix_vector |= WX_PX_IVAR_ALLOC_VAL;
index = 0;
ivar = rd32(wx, WX_PX_MISC_IVAR);
@@ -2299,8 +2310,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction,
wr32(wx, WX_PX_MISC_IVAR, ivar);
} else {
/* tx or rx causes */
- if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7))
- msix_vector += 1; /* offset for queue vectors */
msix_vector |= WX_PX_IVAR_ALLOC_VAL;
index = ((16 * (queue & 1)) + (8 * direction));
ivar = rd32(wx, WX_PX_IVAR(queue >> 1));
@@ -2339,7 +2348,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector)
itr_reg |= WX_PX_ITR_CNT_WDIS;
- wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg);
+ wr32(wx, WX_PX_ITR(v_idx), itr_reg);
}
/**
@@ -2392,9 +2401,9 @@ void wx_configure_vectors(struct wx *wx)
wx_write_eitr(q_vector);
}
- wx_set_ivar(wx, -1, 0, 0);
+ wx_set_ivar(wx, -1, 0, v_idx);
if (pdev->msix_enabled)
- wr32(wx, WX_PX_ITR(0), 1950);
+ wr32(wx, WX_PX_ITR(v_idx), 1950);
}
EXPORT_SYMBOL(wx_configure_vectors);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
index e8656d9d733b..c82ae137756c 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
@@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx)
wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0);
wx->ring_feature[RING_F_VMDQ].offset = 0;
+ clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags);
clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
/* Disable VMDq flag so device will be set in NM mode */
if (wx->ring_feature[RING_F_VMDQ].limit == 1)
@@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs)
set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs);
+ if (num_vfs == 7 && wx->mac.type == wx_mac_em)
+ set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags);
+
/* Enable VMDq flag so device will be set in VM mode */
set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags);
if (!wx->ring_feature[RING_F_VMDQ].limit)
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 7730c9fc3e02..c363379126c0 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1191,6 +1191,7 @@ enum wx_pf_flags {
WX_FLAG_VMDQ_ENABLED,
WX_FLAG_VLAN_PROMISC,
WX_FLAG_SRIOV_ENABLED,
+ WX_FLAG_IRQ_VECTOR_SHARED,
WX_FLAG_FDIR_CAPABLE,
WX_FLAG_FDIR_HASH,
WX_FLAG_FDIR_PERFECT,
@@ -1343,7 +1344,7 @@ struct wx {
};
#define WX_INTR_ALL (~0ULL)
-#define WX_INTR_Q(i) BIT((i) + 1)
+#define WX_INTR_Q(i) BIT((i))
/* register operations */
#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index b5022c49dc5e..e0fc897b0a58 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues)
if (queues)
wx_intr_enable(wx, NGBE_INTR_ALL);
else
- wx_intr_enable(wx, NGBE_INTR_MISC);
+ wx_intr_enable(wx, NGBE_INTR_MISC(wx));
}
/**
@@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx)
* for queue. But when num_vfs == 7, vector[1] is assigned to vf6.
* Misc and queue should reuse interrupt vector[0].
*/
- if (wx->num_vfs == 7)
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags))
err = request_irq(wx->msix_entry->vector,
ngbe_misc_and_queue, 0, netdev->name, wx);
else
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index bb74263f0498..3b2ca7f47e33 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -87,7 +87,7 @@
#define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */
#define NGBE_INTR_ALL 0x1FF
-#define NGBE_INTR_MISC BIT(0)
+#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry)
#define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4))
#define NGBE_CFG_LAN_SPEED 0x14440
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
index 7dbcf41750c1..dc87ccad9652 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
@@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config,
wx_fc_enable(wx, tx_pause, rx_pause);
txgbe_reconfig_mac(wx);
+ txgbe_enable_sec_tx_path(wx);
txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG);
txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK;
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index 20b9a28bcb55..3885283681ec 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues)
wr32(wx, WX_PX_MISC_IEN, misc_ien);
/* unmask interrupt */
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
if (queues)
wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
}
@@ -78,7 +78,6 @@ free_queue_irqs:
free_irq(wx->msix_q_entries[vector].vector,
wx->q_vector[vector]);
}
- wx_reset_interrupt_capability(wx);
return err;
}
@@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
txgbe->eicr = eicr;
if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) {
wx_msg_task(txgbe->wx);
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
}
return IRQ_WAKE_THREAD;
}
@@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
nhandled++;
}
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
}
@@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe)
free_irq(txgbe->link_irq, txgbe);
free_irq(txgbe->misc.irq, txgbe);
txgbe_del_irq_domain(txgbe);
+ txgbe->wx->misc_irq_domain = false;
}
int txgbe_setup_misc_irq(struct txgbe *txgbe)
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index f3d2778b8e35..a5867f3c93fc 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev)
wx_configure(wx);
- err = txgbe_request_queue_irqs(wx);
+ err = txgbe_setup_misc_irq(wx->priv);
if (err)
goto err_free_resources;
+ err = txgbe_request_queue_irqs(wx);
+ if (err)
+ goto err_free_misc_irq;
+
/* Notify the stack of the actual queue counts. */
err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
if (err)
@@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev)
err_free_irq:
wx_free_irq(wx);
+err_free_misc_irq:
+ txgbe_free_misc_irq(wx->priv);
+ wx_reset_interrupt_capability(wx);
err_free_resources:
wx_free_resources(wx);
err_reset:
@@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev)
wx_ptp_stop(wx);
txgbe_down(wx);
wx_free_irq(wx);
+ txgbe_free_misc_irq(wx->priv);
wx_free_resources(wx);
txgbe_fdir_filter_exit(wx);
wx_control_hw(wx, false);
@@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev)
int txgbe_setup_tc(struct net_device *dev, u8 tc)
{
struct wx *wx = netdev_priv(dev);
- struct txgbe *txgbe = wx->priv;
/* Hardware has to reinitialize queues and interrupts to
* match packet buffer alignment. Unfortunately, the
@@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
else
txgbe_reset(wx);
- txgbe_free_misc_irq(txgbe);
wx_clear_interrupt_scheme(wx);
if (tc)
@@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
netdev_reset_tc(dev);
wx_init_interrupt_scheme(wx);
- txgbe_setup_misc_irq(txgbe);
if (netif_running(dev))
txgbe_open(dev);
@@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev,
txgbe_init_fdir(txgbe);
- err = txgbe_setup_misc_irq(txgbe);
- if (err)
- goto err_release_hw;
-
err = txgbe_init_phy(txgbe);
if (err)
- goto err_free_misc_irq;
+ goto err_release_hw;
err = register_netdev(netdev);
if (err)
@@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev,
err_remove_phy:
txgbe_remove_phy(txgbe);
-err_free_misc_irq:
- txgbe_free_misc_irq(txgbe);
err_release_hw:
wx_clear_interrupt_scheme(wx);
wx_control_hw(wx, false);
@@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
txgbe_remove_phy(txgbe);
- txgbe_free_misc_irq(txgbe);
wx_free_isb_resources(wx);
pci_release_selected_regions(pdev,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 42ec815159e8..41915d7dd372 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -302,8 +302,8 @@ struct txgbe_fdir_filter {
#define TXGBE_DEFAULT_RX_WORK 128
#endif
-#define TXGBE_INTR_MISC BIT(0)
-#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1)
+#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors)
+#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1)
#define TXGBE_MAX_EITR GENMASK(11, 3)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f53e255116ea..e3ca6e91efe1 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -4567,8 +4567,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
if (!dev)
return;
- netif_napi_del(&dev->napi);
-
udev = interface_to_usbdev(intf);
net = dev->net;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e53ba600605a..5d674eb9a0f2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}
+static int check_mergeable_len(struct net_device *dev, void *mrg_ctx,
+ unsigned int len)
+{
+ unsigned int headroom, tailroom, room, truesize;
+
+ truesize = mergeable_ctx_to_truesize(mrg_ctx);
+ headroom = mergeable_ctx_to_headroom(mrg_ctx);
+ tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ room = SKB_DATA_ALIGN(headroom + tailroom);
+
+ if (len > truesize - room) {
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)(truesize - room));
+ DEV_STATS_INC(dev, rx_length_errors);
+ return -1;
+ }
+
+ return 0;
+}
+
static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
unsigned int headroom,
unsigned int len)
@@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi,
* Since most packets only take 1 or 2 ring slots, stopping the queue
* early means 16 slots are typically wasted.
*/
- if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+ if (sq->vq->num_free < MAX_SKB_FRAGS + 2) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
netif_tx_stop_queue(txq);
@@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
free_old_xmit(sq, txq, false);
- if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
netif_start_subqueue(dev, qnum);
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
}
}
+/* Note that @len is the length of received data without virtio header */
static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
- struct receive_queue *rq, void *buf, u32 len)
+ struct receive_queue *rq, void *buf,
+ u32 len, bool first_buf)
{
struct xdp_buff *xdp;
u32 bufsize;
xdp = (struct xdp_buff *)buf;
- bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
+ /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for
+ * virtio header and ask the vhost to fill data from
+ * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len
+ * The first buffer has virtio header so the remaining region for frame
+ * data is
+ * xsk_pool_get_rx_frame_size()
+ * While other buffers than the first one do not have virtio header, so
+ * the maximum frame data's length can be
+ * xsk_pool_get_rx_frame_size() + vi->hdr_len
+ */
+ bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool);
+ if (!first_buf)
+ bufsize += vi->hdr_len;
if (unlikely(len > bufsize)) {
pr_debug("%s: rx error: len %u exceeds truesize %u\n",
@@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi,
u64_stats_add(&stats->bytes, len);
- xdp = buf_to_xdp(vi, rq, buf, len);
+ xdp = buf_to_xdp(vi, rq, buf, len, false);
if (!xdp)
goto err;
@@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu
u64_stats_add(&stats->bytes, len);
- xdp = buf_to_xdp(vi, rq, buf, len);
+ xdp = buf_to_xdp(vi, rq, buf, len, true);
if (!xdp)
return;
@@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
* across multiple buffers (num_buf > 1), and we make sure buffers
* have enough headroom.
*/
-static struct page *xdp_linearize_page(struct receive_queue *rq,
+static struct page *xdp_linearize_page(struct net_device *dev,
+ struct receive_queue *rq,
int *num_buf,
struct page *p,
int offset,
@@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
page_off += *len;
+ /* Only mergeable mode can go inside this while loop. In small mode,
+ * *num_buf == 1, so it cannot go inside.
+ */
while (--*num_buf) {
unsigned int buflen;
void *buf;
+ void *ctx;
int off;
- buf = virtnet_rq_get_buf(rq, &buflen, NULL);
+ buf = virtnet_rq_get_buf(rq, &buflen, &ctx);
if (unlikely(!buf))
goto err_buf;
p = virt_to_head_page(buf);
off = buf - page_address(p);
+ if (check_mergeable_len(dev, ctx, buflen)) {
+ put_page(p);
+ goto err_buf;
+ }
+
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
@@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
headroom = vi->hdr_len + header_offset;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- xdp_page = xdp_linearize_page(rq, &num_buf, page,
+ xdp_page = xdp_linearize_page(dev, rq, &num_buf, page,
offset, header_offset,
&tlen);
if (!xdp_page)
@@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
- unsigned int headroom, tailroom, room;
- unsigned int truesize, cur_frag_size;
struct skb_shared_info *shinfo;
unsigned int xdp_frags_truesz = 0;
+ unsigned int truesize;
struct page *page;
skb_frag_t *frag;
int offset;
@@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
page = virt_to_head_page(buf);
offset = buf - page_address(page);
- truesize = mergeable_ctx_to_truesize(ctx);
- headroom = mergeable_ctx_to_headroom(ctx);
- tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- room = SKB_DATA_ALIGN(headroom + tailroom);
-
- cur_frag_size = truesize;
- xdp_frags_truesz += cur_frag_size;
- if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
+ if (check_mergeable_len(dev, ctx, len)) {
put_page(page);
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
goto err;
}
+ truesize = mergeable_ctx_to_truesize(ctx);
+ xdp_frags_truesz += truesize;
+
frag = &shinfo->frags[shinfo->nr_frags++];
skb_frag_fill_page_desc(frag, page, offset, len);
if (page_is_pfmemalloc(page))
@@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
*/
if (!xdp_prog->aux->xdp_has_frags) {
/* linearize data for XDP */
- xdp_page = xdp_linearize_page(rq, num_buf,
+ xdp_page = xdp_linearize_page(vi->dev, rq, num_buf,
*page, offset,
XDP_PACKET_HEADROOM,
len);
@@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct sk_buff *head_skb, *curr_skb;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
- unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
head_skb = NULL;
u64_stats_add(&stats->bytes, len - vi->hdr_len);
- if (unlikely(len > truesize - room)) {
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
+ if (check_mergeable_len(dev, ctx, len))
goto err_skb;
- }
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
@@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
u64_stats_add(&stats->bytes, len);
page = virt_to_head_page(buf);
- truesize = mergeable_ctx_to_truesize(ctx);
- headroom = mergeable_ctx_to_headroom(ctx);
- tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- room = SKB_DATA_ALIGN(headroom + tailroom);
- if (unlikely(len > truesize - room)) {
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
+ if (check_mergeable_len(dev, ctx, len))
goto err_skb;
- }
+ truesize = mergeable_ctx_to_truesize(ctx);
curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
buf, len, truesize);
if (!curr_skb)
@@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
free_old_xmit(sq, txq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
- if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
else
free_old_xmit(sq, txq, !!budget);
- if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -3481,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
{
int qindex, err;
+ if (ring_num <= MAX_SKB_FRAGS + 2) {
+ netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n",
+ ring_num, MAX_SKB_FRAGS + 2);
+ return -EINVAL;
+ }
+
qindex = sq - vi->sq;
virtnet_tx_pause(vi, sq);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e533d791955d..7493e5aa984c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -386,7 +386,7 @@ static void nvme_log_err_passthru(struct request *req)
nr->cmd->common.cdw12,
nr->cmd->common.cdw13,
nr->cmd->common.cdw14,
- nr->cmd->common.cdw14);
+ nr->cmd->common.cdw15);
}
enum nvme_disposition {
@@ -4086,6 +4086,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
struct nvme_ns *ns;
struct gendisk *disk;
int node = ctrl->numa_node;
+ bool last_path = false;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -4178,9 +4179,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
- if (list_empty(&ns->head->list))
+ if (list_empty(&ns->head->list)) {
list_del_init(&ns->head->entry);
+ /*
+ * If multipath is not configured, we still create a namespace
+ * head (nshead), but head->disk is not initialized in that
+ * case. As a result, only a single reference to nshead is held
+ * (via kref_init()) when it is created. Therefore, ensure that
+ * we do not release the reference to nshead twice if head->disk
+ * is not present.
+ */
+ if (ns->head->disk)
+ last_path = true;
+ }
mutex_unlock(&ctrl->subsys->lock);
+ if (last_path)
+ nvme_put_ns_head(ns->head);
nvme_put_ns_head(ns->head);
out_cleanup_disk:
put_disk(disk);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 316a269842fa..3da980dc60d9 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -690,8 +690,8 @@ static void nvme_remove_head(struct nvme_ns_head *head)
nvme_cdev_del(&head->cdev, &head->cdev_device);
synchronize_srcu(&head->srcu);
del_gendisk(head->disk);
- nvme_put_ns_head(head);
}
+ nvme_put_ns_head(head);
}
static void nvme_remove_head_work(struct work_struct *work)
@@ -1200,7 +1200,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
*/
srcu_idx = srcu_read_lock(&head->srcu);
- list_for_each_entry_rcu(ns, &head->list, siblings) {
+ list_for_each_entry_srcu(ns, &head->list, siblings,
+ srcu_read_lock_held(&head->srcu)) {
/*
* Ensure that ns path disk node is already added otherwise we
* may get invalid kobj name for target
@@ -1291,6 +1292,9 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
bool remove = false;
+ if (!head->disk)
+ return;
+
mutex_lock(&head->subsys->lock);
/*
* We are called when all paths have been removed, and at that point
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8ff12e415cb5..320aaa41ec39 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2101,8 +2101,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
-
- nvme_update_attrs(dev);
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -3010,6 +3008,8 @@ static void nvme_reset_work(struct work_struct *work)
if (result < 0)
goto out;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -3343,6 +3343,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result < 0)
goto out_disable;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out_disable;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index df69a9dee71c..51df72f5e89b 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
{
if (bio != &req->b.inline_bio)
bio_put(bio);
+ else
+ bio_uninit(bio);
}
#ifdef CONFIG_NVME_TARGET_TCP_TLS
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 8172869bd3d7..0743c6acd6e2 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -692,8 +692,12 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
{
u8 irqstat;
u8 rtc_control;
+ unsigned long flags;
- spin_lock(&rtc_lock);
+ /* We cannot use spin_lock() here, as cmos_interrupt() is also called
+ * in a non-irq context.
+ */
+ spin_lock_irqsave(&rtc_lock, flags);
/* When the HPET interrupt handler calls us, the interrupt
* status is passed as arg1 instead of the irq number. But
@@ -727,7 +731,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
hpet_mask_rtc_irq_bit(RTC_AIE);
CMOS_READ(RTC_INTR_FLAGS);
}
- spin_unlock(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
if (is_intr(irqstat)) {
rtc_update_irq(p, 1, irqstat);
@@ -1295,9 +1299,7 @@ static void cmos_check_wkalrm(struct device *dev)
* ACK the rtc irq here
*/
if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) {
- local_irq_disable();
cmos_interrupt(0, (void *)cmos->rtc);
- local_irq_enable();
return;
}
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 31c7dca8f469..2e1ac0c42e93 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -1538,7 +1538,12 @@ static int pcf2127_spi_probe(struct spi_device *spi)
variant = &pcf21xx_cfg[type];
}
- config.max_register = variant->max_register,
+ if (variant->type == PCF2131) {
+ config.read_flag_mask = 0x0;
+ config.write_flag_mask = 0x0;
+ }
+
+ config.max_register = variant->max_register;
regmap = devm_regmap_init_spi(spi, &config);
if (IS_ERR(regmap)) {
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index db5c9b641277..a7220b4d0e8d 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/i2c.h>
#include <linux/bcd.h>
+#include <linux/reboot.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/platform_device.h>
@@ -53,6 +54,7 @@ enum {
* Device | Write time | Read time | Write alarm
* =================================================
* S5M8767 | UDR + TIME | | UDR
+ * S2MPG10 | WUDR | RUDR | AUDR
* S2MPS11/14 | WUDR | RUDR | WUDR + RUDR
* S2MPS13 | WUDR | RUDR | WUDR + AUDR
* S2MPS15 | WUDR | RUDR | AUDR
@@ -99,6 +101,20 @@ static const struct s5m_rtc_reg_config s5m_rtc_regs = {
.write_alarm_udr_mask = S5M_RTC_UDR_MASK,
};
+/* Register map for S2MPG10 */
+static const struct s5m_rtc_reg_config s2mpg10_rtc_regs = {
+ .regs_count = 7,
+ .time = S2MPG10_RTC_SEC,
+ .ctrl = S2MPG10_RTC_CTRL,
+ .alarm0 = S2MPG10_RTC_A0SEC,
+ .alarm1 = S2MPG10_RTC_A1SEC,
+ .udr_update = S2MPG10_RTC_UPDATE,
+ .autoclear_udr_mask = S2MPS15_RTC_WUDR_MASK | S2MPS15_RTC_AUDR_MASK,
+ .read_time_udr_mask = S2MPS_RTC_RUDR_MASK,
+ .write_time_udr_mask = S2MPS15_RTC_WUDR_MASK,
+ .write_alarm_udr_mask = S2MPS15_RTC_AUDR_MASK,
+};
+
/* Register map for S2MPS13 */
static const struct s5m_rtc_reg_config s2mps13_rtc_regs = {
.regs_count = 7,
@@ -227,8 +243,8 @@ static int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
return ret;
}
-static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
- struct rtc_wkalrm *alarm)
+static int s5m_check_pending_alarm_interrupt(struct s5m_rtc_info *info,
+ struct rtc_wkalrm *alarm)
{
int ret;
unsigned int val;
@@ -238,6 +254,7 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
val &= S5M_ALARM0_STATUS;
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -262,17 +279,9 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
{
int ret;
- unsigned int data;
- ret = regmap_read(info->regmap, info->regs->udr_update, &data);
- if (ret < 0) {
- dev_err(info->dev, "failed to read update reg(%d)\n", ret);
- return ret;
- }
-
- data |= info->regs->write_time_udr_mask;
-
- ret = regmap_write(info->regmap, info->regs->udr_update, data);
+ ret = regmap_set_bits(info->regmap, info->regs->udr_update,
+ info->regs->write_time_udr_mask);
if (ret < 0) {
dev_err(info->dev, "failed to write update reg(%d)\n", ret);
return ret;
@@ -286,20 +295,14 @@ static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
{
int ret;
- unsigned int data;
+ unsigned int udr_mask;
- ret = regmap_read(info->regmap, info->regs->udr_update, &data);
- if (ret < 0) {
- dev_err(info->dev, "%s: fail to read update reg(%d)\n",
- __func__, ret);
- return ret;
- }
-
- data |= info->regs->write_alarm_udr_mask;
+ udr_mask = info->regs->write_alarm_udr_mask;
switch (info->device_type) {
case S5M8767X:
- data &= ~S5M_RTC_TIME_EN_MASK;
+ udr_mask |= S5M_RTC_TIME_EN_MASK;
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -309,7 +312,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
return -EINVAL;
}
- ret = regmap_write(info->regmap, info->regs->udr_update, data);
+ ret = regmap_update_bits(info->regmap, info->regs->udr_update,
+ udr_mask, info->regs->write_alarm_udr_mask);
if (ret < 0) {
dev_err(info->dev, "%s: fail to write update reg(%d)\n",
__func__, ret);
@@ -320,8 +324,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
/* On S2MPS13 the AUDR is not auto-cleared */
if (info->device_type == S2MPS13X)
- regmap_update_bits(info->regmap, info->regs->udr_update,
- S2MPS13_RTC_AUDR_MASK, 0);
+ regmap_clear_bits(info->regmap, info->regs->udr_update,
+ S2MPS13_RTC_AUDR_MASK);
return ret;
}
@@ -333,10 +337,8 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
int ret;
if (info->regs->read_time_udr_mask) {
- ret = regmap_update_bits(info->regmap,
- info->regs->udr_update,
- info->regs->read_time_udr_mask,
- info->regs->read_time_udr_mask);
+ ret = regmap_set_bits(info->regmap, info->regs->udr_update,
+ info->regs->read_time_udr_mask);
if (ret) {
dev_err(dev,
"Failed to prepare registers for time reading: %d\n",
@@ -351,6 +353,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -374,6 +377,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -411,6 +415,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -430,7 +435,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
dev_dbg(dev, "%s: %ptR(%d)\n", __func__, &alrm->time, alrm->time.tm_wday);
- return s5m_check_peding_alarm_interrupt(info, alrm);
+ return s5m_check_pending_alarm_interrupt(info, alrm);
}
static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
@@ -449,6 +454,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -487,6 +493,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -524,6 +531,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -604,6 +612,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -634,59 +643,92 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
return ret;
}
+static int s5m_rtc_restart_s2mpg10(struct sys_off_data *data)
+{
+ struct s5m_rtc_info *info = data->cb_data;
+ int ret;
+
+ if (data->mode != REBOOT_COLD && data->mode != REBOOT_HARD)
+ return NOTIFY_DONE;
+
+ /*
+ * Arm watchdog with maximum timeout (2 seconds), and perform full reset
+ * on expiry.
+ */
+ ret = regmap_set_bits(info->regmap, S2MPG10_RTC_WTSR,
+ (S2MPG10_WTSR_COLDTIMER | S2MPG10_WTSR_COLDRST
+ | S2MPG10_WTSR_WTSRT | S2MPG10_WTSR_WTSR_EN));
+
+ return ret ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
static int s5m_rtc_probe(struct platform_device *pdev)
{
struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent);
+ enum sec_device_type device_type =
+ platform_get_device_id(pdev)->driver_data;
struct s5m_rtc_info *info;
- struct i2c_client *i2c;
- const struct regmap_config *regmap_cfg;
int ret, alarm_irq;
info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
- switch (platform_get_device_id(pdev)->driver_data) {
- case S2MPS15X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps15_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S2MPS14X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps14_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S2MPS13X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps13_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S5M8767X:
- regmap_cfg = &s5m_rtc_regmap_config;
- info->regs = &s5m_rtc_regs;
- alarm_irq = S5M8767_IRQ_RTCA1;
- break;
- default:
- return dev_err_probe(&pdev->dev, -ENODEV,
- "Device type %lu is not supported by RTC driver\n",
- platform_get_device_id(pdev)->driver_data);
- }
+ info->regmap = dev_get_regmap(pdev->dev.parent, "rtc");
+ if (!info->regmap) {
+ const struct regmap_config *regmap_cfg;
+ struct i2c_client *i2c;
- i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter,
- RTC_I2C_ADDR);
- if (IS_ERR(i2c))
- return dev_err_probe(&pdev->dev, PTR_ERR(i2c),
- "Failed to allocate I2C for RTC\n");
+ switch (device_type) {
+ case S2MPS15X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps15_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S2MPS14X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps14_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S2MPS13X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps13_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S5M8767X:
+ regmap_cfg = &s5m_rtc_regmap_config;
+ info->regs = &s5m_rtc_regs;
+ alarm_irq = S5M8767_IRQ_RTCA1;
+ break;
+ default:
+ return dev_err_probe(&pdev->dev, -ENODEV,
+ "Unsupported device type %d\n",
+ device_type);
+ }
- info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg);
- if (IS_ERR(info->regmap))
- return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap),
- "Failed to allocate RTC register map\n");
+ i2c = devm_i2c_new_dummy_device(&pdev->dev,
+ s5m87xx->i2c->adapter,
+ RTC_I2C_ADDR);
+ if (IS_ERR(i2c))
+ return dev_err_probe(&pdev->dev, PTR_ERR(i2c),
+ "Failed to allocate I2C\n");
+
+ info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg);
+ if (IS_ERR(info->regmap))
+ return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap),
+ "Failed to allocate regmap\n");
+ } else if (device_type == S2MPG10) {
+ info->regs = &s2mpg10_rtc_regs;
+ alarm_irq = S2MPG10_IRQ_RTCA0;
+ } else {
+ return dev_err_probe(&pdev->dev, -ENODEV,
+ "Unsupported device type %d\n",
+ device_type);
+ }
info->dev = &pdev->dev;
info->s5m87xx = s5m87xx;
- info->device_type = platform_get_device_id(pdev)->driver_data;
+ info->device_type = device_type;
if (s5m87xx->irq_data) {
info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq);
@@ -721,7 +763,23 @@ static int s5m_rtc_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, ret,
"Failed to request alarm IRQ %d\n",
info->irq);
- device_init_wakeup(&pdev->dev, true);
+
+ ret = devm_device_init_wakeup(&pdev->dev);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to init wakeup\n");
+ }
+
+ if (of_device_is_system_power_controller(pdev->dev.parent->of_node) &&
+ info->device_type == S2MPG10) {
+ ret = devm_register_sys_off_handler(&pdev->dev,
+ SYS_OFF_MODE_RESTART,
+ SYS_OFF_PRIO_HIGH + 1,
+ s5m_rtc_restart_s2mpg10,
+ info);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to register restart handler\n");
}
return devm_rtc_register_device(info->rtc_dev);
@@ -755,6 +813,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume);
static const struct platform_device_id s5m_rtc_id[] = {
{ "s5m-rtc", S5M8767X },
+ { "s2mpg10-rtc", S2MPG10 },
{ "s2mps13-rtc", S2MPS13X },
{ "s2mps14-rtc", S2MPS14X },
{ "s2mps15-rtc", S2MPS15X },
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index e021f1106bea..cc5d05dc395c 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
else
shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS;
- if (sht->max_segment_size)
- shost->max_segment_size = sht->max_segment_size;
- else
- shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+ shost->virt_boundary_mask = sht->virt_boundary_mask;
+ if (shost->virt_boundary_mask) {
+ WARN_ON_ONCE(sht->max_segment_size &&
+ sht->max_segment_size != UINT_MAX);
+ shost->max_segment_size = UINT_MAX;
+ } else {
+ if (sht->max_segment_size)
+ shost->max_segment_size = sht->max_segment_size;
+ else
+ shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+ }
/* 32-byte (dword) is a common minimum for HBAs. */
if (sht->dma_alignment)
@@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
else
shost->dma_boundary = 0xffffffff;
- if (sht->virt_boundary_mask)
- shost->virt_boundary_mask = sht->virt_boundary_mask;
-
device_initialize(&shost->shost_gendev);
dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 0cd6f3e14882..13b6cb1b93ac 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle,
pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb,
sizeof(*pdb), DMA_FROM_DEVICE);
- if (!pdb_dma) {
+ if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) {
ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n");
return QLA_MEMORY_ALLOC_FAILED;
}
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index d4141656b204..a39f1da4ce47 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode)
task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data,
task->data_count,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma))
+ return -ENOMEM;
}
DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n",
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3f6e87705b62..eeaa6af294b8 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp)
rcu_read_lock();
vpd = rcu_dereference(sdkp->device->vpd_pgb7);
- if (vpd && vpd->len >= 2)
+ if (vpd && vpd->len >= 6)
sdkp->rscs = vpd->data[5] & 1;
rcu_read_unlock();
}
diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index de8b6acd4058..fcb4b14a710f 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1);
UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8);
UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4);
UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1);
-UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8);
+UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8);
UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2);
UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1);
UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4);
@@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = {
&dev_attr_logical_block_count.attr,
&dev_attr_erase_block_size.attr,
&dev_attr_provisioning_type.attr,
- &dev_attr_physical_memory_resourse_count.attr,
+ &dev_attr_physical_memory_resource_count.attr,
&dev_attr_context_capabilities.attr,
&dev_attr_large_unit_granularity.attr,
&dev_attr_wb_buf_alloc_units.attr,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b784aab66867..4397392bfef0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle_done)(struct virtqueue *vq))
{
struct vring_virtqueue *vq = to_vvq(_vq);
- int err;
+ int err, err_reset;
if (num > vq->vq.num_max)
return -E2BIG;
@@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
else
err = virtqueue_resize_split(_vq, num);
- return virtqueue_enable_after_reset(_vq);
+ err_reset = virtqueue_enable_after_reset(_vq);
+ if (err_reset)
+ return err_reset;
+
+ return err;
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e51e7d88980a..1d847a939f29 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
-static struct inode *anon_inode_make_secure_inode(
- const char *name,
- const struct inode *context_inode)
+/**
+ * anon_inode_make_secure_inode - allocate an anonymous inode with security context
+ * @sb: [in] Superblock to allocate from
+ * @name: [in] Name of the class of the newfile (e.g., "secretmem")
+ * @context_inode:
+ * [in] Optional parent inode for security inheritance
+ *
+ * The function ensures proper security initialization through the LSM hook
+ * security_inode_init_security_anon().
+ *
+ * Return: Pointer to new inode on success, ERR_PTR on failure.
+ */
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode)
{
struct inode *inode;
int error;
- inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ inode = alloc_anon_inode(sb);
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode(
}
return inode;
}
+EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
@@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
if (make_inode) {
- inode = anon_inode_make_secure_inode(name, context_inode);
+ inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb,
+ name, context_inode);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 8043943cdf6a..ddfacad0f70c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -863,9 +863,7 @@ struct bch_fs {
DARRAY(enum bcachefs_metadata_version)
incompat_versions_requested;
-#ifdef CONFIG_UNICODE
struct unicode_map *cf_encoding;
-#endif
struct bch_sb_handle disk_sb;
@@ -1285,4 +1283,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca
: ca->mi.discard;
}
+static inline bool bch2_fs_casefold_enabled(struct bch_fs *c)
+{
+#ifdef CONFIG_UNICODE
+ return !c->opts.casefold_disabled;
+#else
+ return false;
+#endif
+}
+
#endif /* _BCACHEFS_H */
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 08b22bddd747..e874a4357f64 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
- scoped_guard(rcu)
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
- struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
-
- if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
- set_btree_node_need_rewrite(b);
- set_btree_node_need_rewrite_degraded(b);
+ /*
+ * XXX:
+ *
+ * We deadlock if too many btree updates require node rewrites while
+ * we're still in journal replay.
+ *
+ * This is because btree node rewrites generate more updates for the
+ * interior updates (alloc, backpointers), and if those updates touch
+ * new nodes and generate more rewrites - well, you see the problem.
+ *
+ * The biggest cause is that we don't use the btree write buffer (for
+ * the backpointer updates - this needs some real thought on locking in
+ * order to fix.
+ *
+ * The problem with this workaround (not doing the rewrite for degraded
+ * nodes in journal replay) is that those degraded nodes persist, and we
+ * don't want that (this is a real bug when a btree node write completes
+ * with fewer replicas than we wanted and leaves a degraded node due to
+ * device _removal_, i.e. the device went away mid write).
+ *
+ * It's less of a bug here, but still a problem because we don't yet
+ * have a way of tracking degraded data - we another index (all
+ * extents/btree nodes, by replicas entry) in order to fix properly
+ * (re-replicate degraded data at the earliest possible time).
+ */
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) {
+ scoped_guard(rcu)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+ struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
+
+ if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
+ set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_degraded(b);
+ }
}
- }
+ }
if (!ptr_written) {
set_btree_node_need_rewrite(b);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 352f9cd2634f..f8829b667ad3 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2189,7 +2189,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
bch2_btree_journal_peek_prev(trans, iter, search_key,
- k->k ? k->k->p : path_l(path)->b->key.k.p);
+ k->k ? k->k->p : path_l(path)->b->data->min_key);
if (next_journal) {
iter->k = next_journal->k;
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 300f7cc8abdf..a18d0f78704d 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -18,7 +18,9 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
-#ifdef CONFIG_UNICODE
+ if (!bch2_fs_casefold_enabled(trans->c))
+ return -EOPNOTSUPP;
+
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
int ret = PTR_ERR_OR_ZERO(buf);
if (ret)
@@ -30,9 +32,6 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
@@ -231,7 +230,8 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
}
-int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
+int bch2_dirent_init_name(struct bch_fs *c,
+ struct bkey_i_dirent *dirent,
const struct bch_hash_info *hash_info,
const struct qstr *name,
const struct qstr *cf_name)
@@ -251,7 +251,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
offsetof(struct bch_dirent, d_name) -
name->len);
} else {
-#ifdef CONFIG_UNICODE
+ if (!bch2_fs_casefold_enabled(c))
+ return -EOPNOTSUPP;
+
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
@@ -277,9 +279,6 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
-#else
- return -EOPNOTSUPP;
-#endif
}
unsigned u64s = dirent_val_u64s(name->len, cf_len);
@@ -313,7 +312,7 @@ struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans,
dirent->v.d_type = type;
dirent->v.d_unused = 0;
- int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name);
+ int ret = bch2_dirent_init_name(trans->c, dirent, hash_info, name, cf_name);
if (ret)
return ERR_PTR(ret);
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 70fb0b581221..1e17199cc5c7 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -59,7 +59,8 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
-int bch2_dirent_init_name(struct bkey_i_dirent *,
+int bch2_dirent_init_name(struct bch_fs *,
+ struct bkey_i_dirent *,
const struct bch_hash_info *,
const struct qstr *,
const struct qstr *);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index db24a76563f8..e54e4f255b22 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (IS_ERR(inode))
inode = NULL;
-#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(vdir)) {
/*
* Do not cache a negative dentry in casefolded directories
@@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
*/
return NULL;
}
-#endif
return d_splice_alias(&inode->v, dentry);
}
@@ -2566,9 +2564,10 @@ got_sb:
sb->s_shrink->seeks = 0;
#ifdef CONFIG_UNICODE
- sb->s_encoding = c->cf_encoding;
-#endif
+ if (bch2_fs_casefold_enabled(c))
+ sb->s_encoding = c->cf_encoding;
generic_set_sb_d_ops(sb);
+#endif
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9920f1affc5b..dbf161e4311a 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2302,9 +2302,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
-#ifdef CONFIG_UNICODE
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
-#endif
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
@@ -2819,7 +2817,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent");
if (ret)
- break;
+ goto out;
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 53e5dc1f6ac1..ef4cc7395b86 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1265,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
{
struct bch_fs *c = trans->c;
-#ifdef CONFIG_UNICODE
+#ifndef CONFIG_UNICODE
+ bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
+ return -EOPNOTSUPP;
+#endif
+
+ if (c->opts.casefold_disabled)
+ return -EOPNOTSUPP;
+
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
@@ -1289,10 +1296,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
bi->bi_fields_set |= BIT(Inode_opt_casefold);
return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi);
-#else
- bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
- return -EOPNOTSUPP;
-#endif
}
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index b0a76bd6d6f5..63f8e254495c 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -234,6 +234,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_CASEFOLD, false, \
NULL, "Dirent lookups are casefolded") \
+ x(casefold_disabled, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Disable casefolding filesystem wide") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 0641fb634bd4..d154b7651d28 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -314,7 +314,7 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
- x(invalid_btree_id, 274, 0) \
+ x(invalid_btree_id, 274, FSCK_AUTOFIX) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index 71b735a85026..3e9f59226bdf 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -38,6 +38,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
struct bkey_s_c_dirent old,
bool *updated_before_k_pos)
{
+ struct bch_fs *c = trans->c;
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
int ret = PTR_ERR_OR_ZERO(new);
@@ -60,7 +61,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
old_name.len, old_name.name, i));
- ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL);
+ ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL);
if (ret)
return ret;
@@ -79,7 +80,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
}
ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
- bch_err_fn(trans->c, ret);
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 79d51aef70aa..8979ac2d7a3b 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
-#ifdef CONFIG_UNICODE
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
-#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 69c097ff54e7..c46b1053a02c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1025,15 +1025,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
}
#ifdef CONFIG_UNICODE
- /* Default encoding until we can potentially have more as an option. */
- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
- if (IS_ERR(c->cf_encoding)) {
- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
- ret = -EINVAL;
- goto err;
+ if (bch2_fs_casefold_enabled(c)) {
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
}
#else
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
@@ -1160,12 +1162,11 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
-#ifdef CONFIG_UNICODE
- bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
-#endif
+ if (c->cf_encoding)
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 9de356bcb411..aa176cc9a324 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
/* Does the block group need to be added to the free space tree? */
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ /* Set after we add a new block group to the free space tree. */
+ BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
/* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
/*
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index a3e2a2a81461..a83c268f7f87 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1241,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
{
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
+ struct rb_node *node;
int nr;
int ret;
@@ -1269,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
+ node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *bg;
+
+ bg = rb_entry(node, struct btrfs_block_group, cache_node);
+ clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
+ node = rb_next(node);
+ cond_resched();
+ }
+
return 0;
}
@@ -1358,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
+
+ if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
+ &block_group->runtime_flags))
+ goto next;
+
ret = populate_free_space_tree(trans, block_group);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
+next:
if (btrfs_should_end_transaction(trans)) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(free_space_root, 1);
@@ -1390,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
+ /*
+ * While rebuilding the free space tree we may allocate new metadata
+ * block groups while modifying the free space tree.
+ *
+ * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
+ * can use multiple transactions, every time btrfs_end_transaction() is
+ * called at btrfs_rebuild_free_space_tree() we finish the creation of
+ * new block groups by calling btrfs_create_pending_block_groups(), and
+ * that in turn calls us, through add_block_group_free_space(), to add
+ * a free space info item and a free space extent item for the block
+ * group.
+ *
+ * Then later btrfs_rebuild_free_space_tree() may find such new block
+ * groups and processes them with populate_free_space_tree(), which can
+ * fail with EEXIST since there are already items for the block group in
+ * the free space tree. Notice that we say "may find" because a new
+ * block group may be added to the block groups rbtree in a node before
+ * or after the block group currently being processed by the rebuild
+ * process. So signal the rebuild process to skip such new block groups
+ * if it finds them.
+ */
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
ret = add_new_free_space_info(trans, block_group, path);
if (ret)
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 26d6ed170a19..fc66872b4c74 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
int ret = 0;
struct btrfs_trans_handle *trans;
- u64 last_unlink_trans;
struct fscrypt_name fname;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
@@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
goto out_notrans;
}
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to its
+ * parent directory. This is to prevent an unrecoverable log tree in the
+ * case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ *
+ * This is because we can't unlink other roots when replaying the dir
+ * deletes for directory foo.
+ */
+ if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
goto out;
@@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (ret)
goto out;
- last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
/* now the directory is empty */
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
- if (!ret) {
+ if (!ret)
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * Propagate the last_unlink_trans value of the deleted dir to
- * its parent directory. This is to prevent an unrecoverable
- * log tree in the case we do something like this:
- * 1) create dir foo
- * 2) create snapshot under dir foo
- * 3) delete the snapshot
- * 4) rmdir foo
- * 5) mkdir foo
- * 6) fsync foo or some file inside foo
- */
- if (last_unlink_trans >= trans->transid)
- BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
- }
out:
btrfs_end_transaction(trans);
out_notrans:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4eda35bdba71..8a60983a697c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out;
}
+ btrfs_record_new_subvolume(trans, BTRFS_I(dir));
+
ret = btrfs_create_new_inode(trans, &new_inode_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- btrfs_record_new_subvolume(trans, BTRFS_I(dir));
-
d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 858b609e292c..cea8a7e9d6d3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
unsigned int nofs_flag;
struct btrfs_inode *inode;
+ /* Only meant to be called for subvolume roots and not for log roots. */
+ ASSERT(is_fstree(btrfs_root_id(root)));
+
/*
* We're holding a transaction handle whether we are logging or
* replaying a log tree, so we must make sure NOFS semantics apply
@@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
return 0;
}
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
- u64 objectid)
-{
- struct btrfs_inode *inode;
-
- inode = btrfs_iget_logging(objectid, root);
- if (IS_ERR(inode))
- return NULL;
- return inode;
-}
-
/* replays a single extent in 'eb' at 'slot' with 'key' into the
* subvolume 'root'. path is released on entry and should be released
* on exit.
@@ -674,9 +662,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
return -EUCLEAN;
}
- inode = read_one_inode(root, key->objectid);
- if (!inode)
- return -EIO;
+ inode = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
/*
* first check to see if we already have this extent in the
@@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -1073,7 +1062,9 @@ again:
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = parent_objectid;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret == 0) {
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
struct btrfs_inode_ref *victim_ref;
unsigned long ptr;
unsigned long ptr_end;
@@ -1146,13 +1137,13 @@ again:
struct fscrypt_str victim_name;
extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
goto next;
ret = read_alloc_one_name(leaf, &extref->name,
- btrfs_inode_extref_name_len(leaf, extref),
- &victim_name);
+ victim_name.len, &victim_name);
if (ret)
return ret;
@@ -1167,10 +1158,10 @@ again:
kfree(victim_name.name);
return ret;
} else if (!ret) {
- ret = -ENOENT;
- victim_parent = read_one_inode(root,
- parent_objectid);
- if (victim_parent) {
+ victim_parent = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(victim_parent)) {
+ ret = PTR_ERR(victim_parent);
+ } else {
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
@@ -1315,9 +1306,9 @@ again:
struct btrfs_inode *dir;
btrfs_release_path(path);
- dir = read_one_inode(root, parent_id);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_id, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
kfree(name.name);
goto out;
}
@@ -1389,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* copy the back ref in. The link count fixup code will take
* care of the rest
*/
- dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
goto out;
}
- inode = read_one_inode(root, inode_objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(inode_objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -1409,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* parent object can change from one array
* item to another.
*/
- if (!dir)
- dir = read_one_inode(root, parent_objectid);
if (!dir) {
- ret = -ENOENT;
- goto out;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
+ goto out;
+ }
}
} else {
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
@@ -1682,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
btrfs_release_path(path);
- inode = read_one_inode(root, key.offset);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.offset, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
@@ -1720,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode;
struct inode *vfs_inode;
- inode = read_one_inode(root, objectid);
- if (!inode)
- return -EIO;
+ inode = btrfs_iget_logging(objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
vfs_inode = &inode->vfs_inode;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
@@ -1761,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir;
int ret;
- inode = read_one_inode(root, location->objectid);
- if (!inode)
- return -ENOENT;
+ inode = btrfs_iget_logging(location->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- dir = read_one_inode(root, dirid);
- if (!dir) {
+ dir = btrfs_iget_logging(dirid, root);
+ if (IS_ERR(dir)) {
iput(&inode->vfs_inode);
- return -EIO;
+ return PTR_ERR(dir);
}
ret = btrfs_add_link(trans, dir, inode, name, 1, index);
@@ -1845,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
bool update_size = true;
bool name_added = false;
- dir = read_one_inode(root, key->objectid);
- if (!dir)
- return -EIO;
+ dir = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
if (ret)
@@ -2147,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -2301,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (!log_path)
return -ENOMEM;
- dir = read_one_inode(root, dirid);
- /* it isn't an error if the inode isn't there, that can happen
- * because we replay the deletes before we copy in the inode item
- * from the log
+ dir = btrfs_iget_logging(dirid, root);
+ /*
+ * It isn't an error if the inode isn't there, that can happen because
+ * we replay the deletes before we copy in the inode item from the log.
*/
- if (!dir) {
+ if (IS_ERR(dir)) {
btrfs_free_path(log_path);
- return 0;
+ ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
}
range_start = 0;
@@ -2467,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_inode *inode;
u64 from;
- inode = read_one_inode(root, key.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
from = ALIGN(i_size_read(&inode->vfs_inode),
@@ -7448,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full log sync.
* Also we don't need to worry with renames, since btrfs_rename() marks the log
* for full commit when renaming a subvolume.
+ *
+ * Must be called before creating the subvolume entry in its parent directory.
*/
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d4dbffdedd08..a97a771a459c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -137,13 +137,7 @@ struct epitem {
};
/* List header used to link this structure to the eventpoll ready list */
- struct list_head rdllink;
-
- /*
- * Works together "struct eventpoll"->ovflist in keeping the
- * single linked chain of items.
- */
- struct epitem *next;
+ struct llist_node rdllink;
/* The file descriptor information this item refers to */
struct epoll_filefd ffd;
@@ -191,22 +185,15 @@ struct eventpoll {
/* Wait queue used by file->poll() */
wait_queue_head_t poll_wait;
- /* List of ready file descriptors */
- struct list_head rdllist;
-
- /* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ /*
+ * List of ready file descriptors. Adding to this list is lockless. Items can be removed
+ * only with eventpoll::mtx
+ */
+ struct llist_head rdllist;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
- /*
- * This is a single linked list that chains all the "struct epitem" that
- * happened while transferring ready events to userspace w/out
- * holding ->lock.
- */
- struct epitem *ovflist;
-
/* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */
struct wakeup_source *ws;
@@ -361,10 +348,14 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
(p1->file < p2->file ? -1 : p1->fd - p2->fd));
}
-/* Tells us if the item is currently linked */
-static inline int ep_is_linked(struct epitem *epi)
+/*
+ * Add the item to its container eventpoll's rdllist; do nothing if the item is already on rdllist.
+ */
+static void epitem_ready(struct epitem *epi)
{
- return !list_empty(&epi->rdllink);
+ if (&epi->rdllink == cmpxchg(&epi->rdllink.next, &epi->rdllink, NULL))
+ llist_add(&epi->rdllink, &epi->ep->rdllist);
+
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
@@ -383,13 +374,26 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
*
* @ep: Pointer to the eventpoll context.
*
- * Return: a value different than %zero if ready events are available,
- * or %zero otherwise.
+ * Return: true if ready events might be available, false otherwise.
*/
-static inline int ep_events_available(struct eventpoll *ep)
+static inline bool ep_events_available(struct eventpoll *ep)
{
- return !list_empty_careful(&ep->rdllist) ||
- READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
+ bool available;
+ int locked;
+
+ locked = mutex_trylock(&ep->mtx);
+ if (!locked) {
+ /*
+ * The lock held and someone might have removed all items while inspecting it. The
+ * llist_empty() check in this case is futile. Assume that something is enqueued and
+ * let ep_try_send_events() figure it out.
+ */
+ return true;
+ }
+
+ available = !llist_empty(&ep->rdllist);
+ mutex_unlock(&ep->mtx);
+ return available;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -724,77 +728,6 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
rcu_read_unlock();
}
-
-/*
- * ep->mutex needs to be held because we could be hit by
- * eventpoll_release_file() and epoll_ctl().
- */
-static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
-{
- /*
- * Steal the ready list, and re-init the original one to the
- * empty list. Also, set ep->ovflist to NULL so that events
- * happening while looping w/out locks, are not lost. We cannot
- * have the poll callback to queue directly on ep->rdllist,
- * because we want the "sproc" callback to be able to do it
- * in a lockless way.
- */
- lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
- list_splice_init(&ep->rdllist, txlist);
- WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
-}
-
-static void ep_done_scan(struct eventpoll *ep,
- struct list_head *txlist)
-{
- struct epitem *epi, *nepi;
-
- write_lock_irq(&ep->lock);
- /*
- * During the time we spent inside the "sproc" callback, some
- * other events might have been queued by the poll callback.
- * We re-insert them inside the main ready-list here.
- */
- for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
- nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
- /*
- * We need to check if the item is already in the list.
- * During the "sproc" callback execution time, items are
- * queued into ->ovflist but the "txlist" might already
- * contain them, and the list_splice() below takes care of them.
- */
- if (!ep_is_linked(epi)) {
- /*
- * ->ovflist is LIFO, so we have to reverse it in order
- * to keep in FIFO.
- */
- list_add(&epi->rdllink, &ep->rdllist);
- ep_pm_stay_awake(epi);
- }
- }
- /*
- * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
- * releasing the lock, events will be queued in the normal way inside
- * ep->rdllist.
- */
- WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
-
- /*
- * Quickly re-inject items left on "txlist".
- */
- list_splice(txlist, &ep->rdllist);
- __pm_relax(ep->ws);
-
- if (!list_empty(&ep->rdllist)) {
- if (waitqueue_active(&ep->wq))
- wake_up(&ep->wq);
- }
-
- write_unlock_irq(&ep->lock);
-}
-
static void ep_get(struct eventpoll *ep)
{
refcount_inc(&ep->refcount);
@@ -832,10 +765,12 @@ static void ep_free(struct eventpoll *ep)
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
{
struct file *file = epi->ffd.file;
+ struct llist_node *put_back_last;
struct epitems_head *to_free;
struct hlist_head *head;
+ LLIST_HEAD(put_back);
- lockdep_assert_irqs_enabled();
+ lockdep_assert_held(&ep->mtx);
/*
* Removes poll wait queue hooks.
@@ -867,10 +802,20 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
- if (ep_is_linked(epi))
- list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ if (llist_on_list(&epi->rdllink)) {
+ put_back_last = NULL;
+ while (true) {
+ struct llist_node *n = llist_del_first(&ep->rdllist);
+
+ if (&epi->rdllink == n || WARN_ON(!n))
+ break;
+ if (!put_back_last)
+ put_back_last = n;
+ __llist_add(n, &put_back);
+ }
+ if (put_back_last)
+ llist_add_batch(put_back.first, put_back_last, &ep->rdllist);
+ }
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -974,8 +919,9 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth
static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth)
{
struct eventpoll *ep = file->private_data;
- LIST_HEAD(txlist);
- struct epitem *epi, *tmp;
+ struct wakeup_source *ws;
+ struct llist_node *n;
+ struct epitem *epi;
poll_table pt;
__poll_t res = 0;
@@ -989,22 +935,39 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
* the ready list.
*/
mutex_lock_nested(&ep->mtx, depth);
- ep_start_scan(ep, &txlist);
- list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
+ while (true) {
+ n = llist_del_first_init(&ep->rdllist);
+ if (!n)
+ break;
+
+ epi = llist_entry(n, struct epitem, rdllink);
+
if (ep_item_poll(epi, &pt, depth + 1)) {
res = EPOLLIN | EPOLLRDNORM;
+ epitem_ready(epi);
break;
} else {
/*
- * Item has been dropped into the ready list by the poll
- * callback, but it's not actually ready, as far as
- * caller requested events goes. We can remove it here.
+ * We need to activate ep before deactivating epi, to prevent autosuspend
+ * just in case epi becomes active after ep_item_poll() above.
+ *
+ * This is similar to ep_send_events().
*/
+ ws = ep_wakeup_source(epi);
+ if (ws) {
+ if (ws->active)
+ __pm_stay_awake(ep->ws);
+ __pm_relax(ws);
+ }
__pm_relax(ep_wakeup_source(epi));
- list_del_init(&epi->rdllink);
+
+ /* Just in case epi becomes active right before __pm_relax() */
+ if (unlikely(ep_item_poll(epi, &pt, depth + 1)))
+ ep_pm_stay_awake(epi);
+
+ __pm_relax(ep->ws);
}
}
- ep_done_scan(ep, &txlist);
mutex_unlock(&ep->mtx);
return res;
}
@@ -1153,12 +1116,10 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
- INIT_LIST_HEAD(&ep->rdllist);
+ init_llist_head(&ep->rdllist);
ep->rbr = RB_ROOT_CACHED;
- ep->ovflist = EP_UNACTIVE_PTR;
ep->user = get_current_user();
refcount_set(&ep->refcount, 1);
@@ -1241,93 +1202,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
#endif /* CONFIG_KCMP */
/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
-/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
*
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
* Another thing worth to mention is that ep_poll_callback() can be called
* concurrently for the same @epi from different CPUs if poll table was inited
* with several wait queues entries. Plural wakeup from different CPUs of a
@@ -1337,15 +1215,11 @@ static inline bool chain_epi_lockless(struct epitem *epi)
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
- int pwake = 0;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
__poll_t pollflags = key_to_poll(key);
- unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
-
ep_set_busy_poll_napi_id(epi);
/*
@@ -1355,7 +1229,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
- goto out_unlock;
+ goto out;
/*
* Check the events coming with the callback. At this stage, not
@@ -1364,22 +1238,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* test for "key" != NULL before the event match test.
*/
if (pollflags && !(pollflags & epi->event.events))
- goto out_unlock;
+ goto out;
- /*
- * If we are transferring events to userspace, we can hold no locks
- * (because we're accessing user memory, and because of linux f_op->poll()
- * semantics). All the events that happen during that period of time are
- * chained in ep->ovflist and requeued later on.
- */
- if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
- ep_pm_stay_awake_rcu(epi);
- } else if (!ep_is_linked(epi)) {
- /* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
- }
+ ep_pm_stay_awake_rcu(epi);
+ epitem_ready(epi);
/*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
@@ -1408,15 +1270,9 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
wake_up(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
- pwake++;
-
-out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
-
- /* We have to call this outside the lock */
- if (pwake)
ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
+out:
if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1;
@@ -1661,8 +1517,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (is_file_epoll(tfile))
tep = tfile->private_data;
- lockdep_assert_irqs_enabled();
-
if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
max_user_watches) >= 0))
return -ENOSPC;
@@ -1674,11 +1528,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* Item initialization follow here ... */
- INIT_LIST_HEAD(&epi->rdllink);
+ init_llist_node(&epi->rdllink);
epi->ep = ep;
ep_set_ffd(&epi->ffd, tfile, fd);
epi->event = *event;
- epi->next = EP_UNACTIVE_PTR;
if (tep)
mutex_lock_nested(&tep->mtx, 1);
@@ -1745,16 +1598,13 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return -ENOMEM;
}
- /* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
-
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
- if (revents && !ep_is_linked(epi)) {
- list_add_tail(&epi->rdllink, &ep->rdllist);
+ if (revents) {
ep_pm_stay_awake(epi);
+ epitem_ready(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1763,8 +1613,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
-
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(ep, NULL, 0);
@@ -1779,11 +1627,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
static int ep_modify(struct eventpoll *ep, struct epitem *epi,
const struct epoll_event *event)
{
- int pwake = 0;
poll_table pt;
- lockdep_assert_irqs_enabled();
-
init_poll_funcptr(&pt, NULL);
/*
@@ -1827,24 +1672,16 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
- if (!ep_is_linked(epi)) {
- list_add_tail(&epi->rdllink, &ep->rdllist);
- ep_pm_stay_awake(epi);
+ ep_pm_stay_awake(epi);
+ epitem_ready(epi);
- /* Notify waiting tasks that events are available */
- if (waitqueue_active(&ep->wq))
- wake_up(&ep->wq);
- if (waitqueue_active(&ep->poll_wait))
- pwake++;
- }
- write_unlock_irq(&ep->lock);
+ /* Notify waiting tasks that events are available */
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wq);
+ if (waitqueue_active(&ep->poll_wait))
+ ep_poll_safewake(ep, NULL, 0);
}
- /* We have to call this outside the lock */
- if (pwake)
- ep_poll_safewake(ep, NULL, 0);
-
return 0;
}
@@ -1852,7 +1689,7 @@ static int ep_send_events(struct eventpoll *ep,
struct epoll_event __user *events, int maxevents)
{
struct epitem *epi, *tmp;
- LIST_HEAD(txlist);
+ LLIST_HEAD(txlist);
poll_table pt;
int res = 0;
@@ -1867,19 +1704,18 @@ static int ep_send_events(struct eventpoll *ep,
init_poll_funcptr(&pt, NULL);
mutex_lock(&ep->mtx);
- ep_start_scan(ep, &txlist);
- /*
- * We can loop without lock because we are passed a task private list.
- * Items cannot vanish during the loop we are holding ep->mtx.
- */
- list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
+ while (res < maxevents) {
struct wakeup_source *ws;
+ struct llist_node *n;
__poll_t revents;
- if (res >= maxevents)
+ n = llist_del_first(&ep->rdllist);
+ if (!n)
break;
+ epi = llist_entry(n, struct epitem, rdllink);
+
/*
* Activate ep->ws before deactivating epi->ws to prevent
* triggering auto-suspend here (in case we reactive epi->ws
@@ -1896,21 +1732,30 @@ static int ep_send_events(struct eventpoll *ep,
__pm_relax(ws);
}
- list_del_init(&epi->rdllink);
-
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, we are holding ep->mtx,
* so no operations coming from userspace can change the item.
*/
revents = ep_item_poll(epi, &pt, 1);
- if (!revents)
+ if (!revents) {
+ init_llist_node(n);
+
+ /*
+ * Just in case epi becomes ready after ep_item_poll() above, but before
+ * init_llist_node(). Make sure to add it to the ready list, otherwise an
+ * event may be lost.
+ */
+ if (unlikely(ep_item_poll(epi, &pt, 1))) {
+ ep_pm_stay_awake(epi);
+ epitem_ready(epi);
+ }
continue;
+ }
events = epoll_put_uevent(revents, epi->event.data, events);
if (!events) {
- list_add(&epi->rdllink, &txlist);
- ep_pm_stay_awake(epi);
+ llist_add(&epi->rdllink, &ep->rdllist);
if (!res)
res = -EFAULT;
break;
@@ -1918,25 +1763,31 @@ static int ep_send_events(struct eventpoll *ep,
res++;
if (epi->event.events & EPOLLONESHOT)
epi->event.events &= EP_PRIVATE_BITS;
- else if (!(epi->event.events & EPOLLET)) {
+ __llist_add(n, &txlist);
+ }
+
+ llist_for_each_entry_safe(epi, tmp, txlist.first, rdllink) {
+ init_llist_node(&epi->rdllink);
+
+ if (!(epi->event.events & EPOLLET)) {
/*
- * If this file has been added with Level
- * Trigger mode, we need to insert back inside
- * the ready list, so that the next call to
- * epoll_wait() will check again the events
- * availability. At this point, no one can insert
- * into ep->rdllist besides us. The epoll_ctl()
- * callers are locked out by
- * ep_send_events() holding "mtx" and the
- * poll callback will queue them in ep->ovflist.
+ * If this file has been added with Level Trigger mode, we need to insert
+ * back inside the ready list, so that the next call to epoll_wait() will
+ * check again the events availability.
*/
- list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
+ epitem_ready(epi);
}
}
- ep_done_scan(ep, &txlist);
+
+ __pm_relax(ep->ws);
mutex_unlock(&ep->mtx);
+ if (!llist_empty(&ep->rdllist)) {
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wq);
+ }
+
return res;
}
@@ -2029,8 +1880,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
- lockdep_assert_irqs_enabled();
-
if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
slack = select_estimate_accuracy(timeout);
to = &expires;
@@ -2090,54 +1939,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
- /*
- * Barrierless variant, waitqueue_active() is called under
- * the same lock on wakeup ep_poll_callback() side, so it
- * is safe to avoid an explicit barrier.
- */
- __set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait_exclusive(&ep->wq, &wait, TASK_INTERRUPTIBLE);
- /*
- * Do the final check under the lock. ep_start/done_scan()
- * plays with two lists (->rdllist and ->ovflist) and there
- * is always a race when both lists are empty for short
- * period of time although events are pending, so lock is
- * important.
- */
- eavail = ep_events_available(ep);
- if (!eavail)
- __add_wait_queue_exclusive(&ep->wq, &wait);
-
- write_unlock_irq(&ep->lock);
-
- if (!eavail)
+ if (!ep_events_available(ep))
timed_out = !ep_schedule_timeout(to) ||
!schedule_hrtimeout_range(to, slack,
HRTIMER_MODE_ABS);
- __set_current_state(TASK_RUNNING);
-
- /*
- * We were woken up, thus go and try to harvest some events.
- * If timed out and still on the wait queue, recheck eavail
- * carefully under lock, below.
- */
- eavail = 1;
- if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
- /*
- * If the thread timed out and is not on the wait queue,
- * it means that the thread was woken up after its
- * timeout expired before it could reacquire the lock.
- * Thus, when wait.entry is empty, it needs to harvest
- * events.
- */
- if (timed_out)
- eavail = list_empty(&wait.entry);
- __remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
- }
+ finish_wait(&ep->wq, &wait);
+ eavail = ep_events_available(ep);
}
}
diff --git a/fs/exec.c b/fs/exec.c
index 1f5fdd2e096e..ba400aafd640 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
bool path_noexec(const struct path *path)
{
+ /* If it's an anonymous inode make sure that we catch any shenanigans. */
+ VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC));
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
@@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (IS_ERR(file))
return file;
+ if (path_noexec(&file->f_path))
+ return ERR_PTR(-EACCES);
+
/*
* In the past the regular type check was here. It moved to may_open() in
* 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
* an invariant that all non-regular files error out before we get here.
*/
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)))
return ERR_PTR(-EACCES);
err = exe_file_deny_write_access(file);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f102afc03359..47006d0753f1 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
- unsigned int max_pages)
+ unsigned int max_folios)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
@@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
int err = 0;
num = min(iov_iter_count(ii), fc->max_write);
- num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
- while (num) {
+ while (num && ap->num_folios < max_folios) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a8..6f487fc6be34 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s)
*/
inode->i_state = I_DIRTY;
/*
- * Historically anonymous inodes didn't have a type at all and
- * userspace has come to rely on this. Internally they're just
- * regular files but S_IFREG is masked off when reporting
- * information to userspace.
+ * Historically anonymous inodes don't have a type at all and
+ * userspace has come to rely on this.
*/
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE | S_ANON_INODE;
diff --git a/fs/namei.c b/fs/namei.c
index f761cafaeaad..c26a7ee42184 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3480,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return -EACCES;
break;
default:
- VFS_BUG_ON_INODE(1, inode);
+ VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 72a3e6db2524..f27ea5099a68 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
* data written into the pagecache until we can find out from the server what
* the values actually are.
*/
-static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
- loff_t i_size, loff_t pos, size_t copied)
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied)
{
+ loff_t i_size, end = pos + copied;
blkcnt_t add;
size_t gap;
+ if (end <= i_size_read(inode))
+ return;
+
if (ctx->ops->update_i_size) {
- ctx->ops->update_i_size(inode, pos);
+ ctx->ops->update_i_size(inode, end);
return;
}
- i_size_write(inode, pos);
+ spin_lock(&inode->i_lock);
+
+ i_size = i_size_read(inode);
+ if (end > i_size) {
+ i_size_write(inode, end);
#if IS_ENABLED(CONFIG_FSCACHE)
- fscache_update_cookie(ctx->cache, NULL, &pos);
+ fscache_update_cookie(ctx->cache, NULL, &end);
#endif
- gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
- if (copied > gap) {
- add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
+ gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
+ if (copied > gap) {
+ add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
- inode->i_blocks = min_t(blkcnt_t,
- DIV_ROUND_UP(pos, SECTOR_SIZE),
- inode->i_blocks + add);
+ inode->i_blocks = min_t(blkcnt_t,
+ DIV_ROUND_UP(end, SECTOR_SIZE),
+ inode->i_blocks + add);
+ }
}
+ spin_unlock(&inode->i_lock);
}
/**
@@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio = NULL, *writethrough = NULL;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
- loff_t i_size, pos = iocb->ki_pos;
+ loff_t pos = iocb->ki_pos;
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
@@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
+ netfs_update_i_size(ctx, inode, pos, copied);
pos += copied;
- i_size = i_size_read(inode);
- if (pos > i_size)
- netfs_update_i_size(ctx, inode, i_size, pos, copied);
written += copied;
if (likely(!wreq)) {
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index fa9a5bf3c6d5..a16660ab7f83 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -9,20 +9,6 @@
#include <linux/uio.h>
#include "internal.h"
-static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
-{
- struct inode *inode = wreq->inode;
- unsigned long long end = wreq->start + wreq->transferred;
-
- if (!wreq->error &&
- i_size_read(inode) < end) {
- if (wreq->netfs_ops->update_i_size)
- wreq->netfs_ops->update_i_size(inode, end);
- else
- i_size_write(inode, end);
- }
-}
-
/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
@@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (async)
wreq->iocb = iocb;
wreq->len = iov_iter_count(&wreq->buffer.iter);
- wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
_debug("begin = %zd", ret);
@@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
if (!async) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
ret = netfs_wait_for_write(wreq);
if (ret > 0)
iocb->ki_pos += ret;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index e2ee9183392b..d4f16fefd965 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
/*
+ * buffered_write.c
+ */
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied);
+
+/*
* main.c
*/
extern unsigned int netfs_debug;
@@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
enum netfs_rreq_trace trace)
{
if (test_bit(rreq_flag, &rreq->flags)) {
- trace_netfs_rreq(rreq, trace);
clear_bit_unlock(rreq_flag, &rreq->flags);
smp_mb__after_atomic(); /* Set flag before task state */
+ trace_netfs_rreq(rreq, trace);
wake_up(&rreq->waitq);
}
}
/*
+ * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+}
+
+/*
+ * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 3db401d269e7..73da6c9f5777 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
if (v == &netfs_io_requests) {
seq_puts(m,
- "REQUEST OR REF FL ERR OPS COVERAGE\n"
- "======== == === == ==== === =========\n"
+ "REQUEST OR REF FLAG ERR OPS COVERAGE\n"
+ "======== == === ==== ==== === =========\n"
);
return 0;
}
rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m,
- "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
+ "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx",
rreq->debug_id,
netfs_origins[rreq->origin],
refcount_read(&rreq->ref),
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 43b67a28a8fa..20748bcfbf59 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
continue;
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce);
for (;;) {
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
break;
trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
}
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce);
finish_wait(&rreq->waitq, &myself);
}
@@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
static int netfs_collect_in_app(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
- bool need_collect = false, inactive = true;
+ bool need_collect = false, inactive = true, done = true;
+
+ if (!netfs_check_rreq_in_progress(rreq)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_recollect);
+ return 1; /* Done */
+ }
for (int i = 0; i < NR_IO_STREAMS; i++) {
struct netfs_io_subrequest *subreq;
@@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
struct netfs_io_subrequest,
rreq_link);
if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ (!netfs_check_subreq_in_progress(subreq) ||
test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
need_collect = true;
break;
}
+ if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
+ done = false;
}
- if (!need_collect && !inactive)
+ if (!need_collect && !inactive && !done)
return 0; /* Sleep */
__set_current_state(TASK_RUNNING);
@@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
/*
* Wait for a request to complete, successfully or otherwise.
*/
-static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
- bool (*collector)(struct netfs_io_request *rreq))
+static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
{
DEFINE_WAIT(myself);
ssize_t ret;
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ if (!netfs_check_rreq_in_progress(rreq))
break;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip);
finish_wait(&rreq->waitq, &myself);
ret = rreq->error;
@@ -478,12 +488,12 @@ all_collected:
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_read_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_read_collection);
}
ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_write_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_write_collection);
}
/*
@@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
{
DEFINE_WAIT(myself);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ if (!netfs_check_rreq_in_progress(rreq) ||
!test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
break;
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause);
finish_wait(&rreq->waitq, &myself);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 96ee18af28ef..3e804da1e1eb 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -218,7 +218,7 @@ reassess:
stream->collected_to = front->start;
}
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
+ if (netfs_check_subreq_in_progress(front))
notes |= HIT_PENDING;
smp_rmb(); /* Read counters after IN_PROGRESS flag. */
transferred = READ_ONCE(front->transferred);
@@ -293,7 +293,9 @@ reassess:
spin_lock(&rreq->lock);
remove = front;
- trace_netfs_sreq(front, netfs_sreq_trace_discard);
+ trace_netfs_sreq(front,
+ notes & ABANDON_SREQ ?
+ netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
@@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_read_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index e2b102ffb768..0f3a36852a4d 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -240,7 +240,7 @@ reassess_streams:
}
/* Stall if the front is still undergoing I/O. */
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
+ if (netfs_check_subreq_in_progress(front)) {
notes |= HIT_PENDING;
break;
}
@@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
- if (wreq->cleanup)
- wreq->cleanup(wreq);
+ if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
+ wreq->origin == NETFS_DIO_WRITE) &&
+ !wreq->error)
+ netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
@@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
- if (wreq->iocb->ki_complete)
+ if (wreq->iocb->ki_complete) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : written);
+ }
wreq->iocb = VFS_PTR_POISON;
}
@@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_write_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 9d1d8a8bab72..fc9c3e0d34d8 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq = netfs_alloc_subrequest(wreq);
subreq->source = to->source;
subreq->start = start;
- subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = to->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df4807460596..4bea008dbebd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
+ goto out_retry;
+ case NFS4ERR_DELAY:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
- case -NFS4ERR_GRACE:
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
case -ENETDOWN:
case -ENETUNREACH:
@@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
+
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1228,6 +1272,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
- case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
+ case 4:
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data, task->tk_status);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg,
+ data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8ab7868807a7..a2fa6bc4d74e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int err;
nfs_clients_init(net);
if (!rpc_proc_register(net, &nn->rpcstats)) {
- nfs_clients_exit(net);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_proc_rpc;
}
- return nfs_fs_proc_net_init(net);
+ err = nfs_fs_proc_net_init(net);
+ if (err)
+ goto err_proc_nfs;
+
+ return 0;
+
+err_proc_nfs:
+ rpc_proc_unregister(net, "nfs");
+err_proc_rpc:
+ nfs_clients_exit(net);
+ return err;
}
static void nfs_net_exit(struct net *net)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3adb7d0dbec7..1a7ec68bde15 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
if (atomic_dec_and_test(&lo->plh_outstanding) &&
- test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+ smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 7216fcec79e8..75142f49d65d 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */
@@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
task_io_account_read(rdata->got_bytes);
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
if (rdata->result == -ENODATA) {
@@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index a717be1626a3..7f6186c2e60d 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4567,7 +4567,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
@@ -4578,11 +4582,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(shdr->CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
@@ -4835,11 +4843,14 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
result = smb2_check_receive(mid, server, 0);
- if (result != 0)
+ if (result != 0) {
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad);
break;
+ }
written = le32_to_cpu(rsp->DataLength);
/*
@@ -4861,14 +4872,23 @@ smb2_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
@@ -4908,7 +4928,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7839efe050bf..000cc7f4a3ce 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3444,16 +3444,41 @@ xfs_alloc_read_agf(
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
+
#ifdef DEBUG
- else if (!xfs_is_shutdown(mp)) {
- ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
- ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
- ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
- ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
- ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
- ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
+ /*
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
+ */
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks);
+ ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount);
+ ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest);
+ ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level);
+ ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
+ xfs_trans_brelse(tp, agfbp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
}
-#endif
+#endif /* DEBUG */
+
if (agfbpp)
*agfbpp = agfbp;
else
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0c47b5c6ca7d..750111634d9f 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi(
set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
}
+#ifdef DEBUG
/*
- * It's possible for these to be out of sync if
- * we are in the middle of a forced shutdown.
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
*/
- ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag_mount(pag)));
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
+ ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ xfs_trans_brelse(tp, agibp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
+ }
+#endif /* DEBUG */
+
if (agibpp)
*agibpp = agibp;
else
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8af83bd161f9..ba5bd6031ece 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit(
return error;
}
-/*
- * Push a single buffer on a delwri queue.
- *
- * The purpose of this function is to submit a single buffer of a delwri queue
- * and return with the buffer still on the original queue.
- *
- * The buffer locking and queue management logic between _delwri_pushbuf() and
- * _delwri_queue() guarantee that the buffer cannot be queued to another list
- * before returning.
- */
-int
-xfs_buf_delwri_pushbuf(
- struct xfs_buf *bp,
- struct list_head *buffer_list)
-{
- int error;
-
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
- trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
-
- xfs_buf_lock(bp);
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
- bp->b_flags |= XBF_WRITE;
- xfs_buf_submit(bp);
-
- /*
- * The buffer is now locked, under I/O but still on the original delwri
- * queue. Wait for I/O completion, restore the DELWRI_Q flag and
- * return with the buffer unlocked and still on the original queue.
- */
- error = xfs_buf_iowait(bp);
- bp->b_flags |= _XBF_DELWRI_Q;
- xfs_buf_unlock(bp);
-
- return error;
-}
-
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
/*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9d2ab567cf81..15fc56948346 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
-extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 90139e0f3271..7fc54725c5f6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
+static void
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->__bli_format;
+ return;
+ }
+
+ bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
+ GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->__bli_format) {
+ kfree(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
+
+static void
+xfs_buf_item_free(
+ struct xfs_buf_log_item *bip)
+{
+ xfs_buf_item_free_format(bip);
+ kvfree(bip->bli_item.li_lv_shadow);
+ kmem_cache_free(xfs_buf_item_cache, bip);
+}
+
+/*
+ * xfs_buf_item_relse() is called when the buf log item is no longer needed.
+ */
+static void
+xfs_buf_item_relse(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+
+ trace_xfs_buf_item_relse(bp, _RET_IP_);
+
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+ ASSERT(atomic_read(&bip->bli_refcount) == 0);
+
+ bp->b_log_item = NULL;
+ xfs_buf_rele(bp);
+ xfs_buf_item_free(bip);
+}
+
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
@@ -390,6 +445,42 @@ xfs_buf_item_pin(
}
/*
+ * For a stale BLI, process all the necessary completions that must be
+ * performed when the final BLI reference goes away. The buffer will be
+ * referenced and locked here - we return to the caller with the buffer still
+ * referenced and locked for them to finalise processing of the buffer.
+ */
+static void
+xfs_buf_item_finish_stale(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_log_item *lip = &bip->bli_item;
+
+ ASSERT(bip->bli_flags & XFS_BLI_STALE);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_flags & XBF_STALE);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(list_empty(&lip->li_trans));
+ ASSERT(!bp->b_transp);
+
+ if (bip->bli_flags & XFS_BLI_STALE_INODE) {
+ xfs_buf_item_done(bp);
+ xfs_buf_inode_iodone(bp);
+ ASSERT(list_empty(&bp->b_li_list));
+ return;
+ }
+
+ /*
+ * We may or may not be on the AIL here, xfs_trans_ail_delete() will do
+ * the right thing regardless of the situation in which we are called.
+ */
+ xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bip);
+ ASSERT(bp->b_log_item == NULL);
+}
+
+/*
* This is called to unpin the buffer associated with the buf log item which was
* previously pinned with a call to xfs_buf_item_pin(). We enter this function
* with a buffer pin count, a buffer reference and a BLI reference.
@@ -438,13 +529,6 @@ xfs_buf_item_unpin(
}
if (stale) {
- ASSERT(bip->bli_flags & XFS_BLI_STALE);
- ASSERT(xfs_buf_islocked(bp));
- ASSERT(bp->b_flags & XBF_STALE);
- ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
- ASSERT(list_empty(&lip->li_trans));
- ASSERT(!bp->b_transp);
-
trace_xfs_buf_item_unpin_stale(bip);
/*
@@ -455,22 +539,7 @@ xfs_buf_item_unpin(
* processing is complete.
*/
xfs_buf_rele(bp);
-
- /*
- * If we get called here because of an IO error, we may or may
- * not have the item on the AIL. xfs_trans_ail_delete() will
- * take care of that situation. xfs_trans_ail_delete() drops
- * the AIL lock.
- */
- if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_item_done(bp);
- xfs_buf_inode_iodone(bp);
- ASSERT(list_empty(&bp->b_li_list));
- } else {
- xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
- xfs_buf_item_relse(bp);
- ASSERT(bp->b_log_item == NULL);
- }
+ xfs_buf_item_finish_stale(bip);
xfs_buf_relse(bp);
return;
}
@@ -543,43 +612,42 @@ xfs_buf_item_push(
* Drop the buffer log item refcount and take appropriate action. This helper
* determines whether the bli must be freed or not, since a decrement to zero
* does not necessarily mean the bli is unused.
- *
- * Return true if the bli is freed, false otherwise.
*/
-bool
+void
xfs_buf_item_put(
struct xfs_buf_log_item *bip)
{
- struct xfs_log_item *lip = &bip->bli_item;
- bool aborted;
- bool dirty;
+
+ ASSERT(xfs_buf_islocked(bip->bli_buf));
/* drop the bli ref and return if it wasn't the last one */
if (!atomic_dec_and_test(&bip->bli_refcount))
- return false;
+ return;
- /*
- * We dropped the last ref and must free the item if clean or aborted.
- * If the bli is dirty and non-aborted, the buffer was clean in the
- * transaction but still awaiting writeback from previous changes. In
- * that case, the bli is freed on buffer writeback completion.
- */
- aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
- xlog_is_shutdown(lip->li_log);
- dirty = bip->bli_flags & XFS_BLI_DIRTY;
- if (dirty && !aborted)
- return false;
+ /* If the BLI is in the AIL, then it is still dirty and in use */
+ if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
+ ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
+ return;
+ }
/*
- * The bli is aborted or clean. An aborted item may be in the AIL
- * regardless of dirty state. For example, consider an aborted
- * transaction that invalidated a dirty bli and cleared the dirty
- * state.
+ * In shutdown conditions, we can be asked to free a dirty BLI that
+ * isn't in the AIL. This can occur due to a checkpoint aborting a BLI
+ * instead of inserting it into the AIL at checkpoint IO completion. If
+ * there's another bli reference (e.g. a btree cursor holds a clean
+ * reference) and it is released via xfs_trans_brelse(), we can get here
+ * with that aborted, dirty BLI. In this case, it is safe to free the
+ * dirty BLI immediately, as it is not in the AIL and there are no
+ * other references to it.
+ *
+ * We should never get here with a stale BLI via that path as
+ * xfs_trans_brelse() specifically holds onto stale buffers rather than
+ * releasing them.
*/
- if (aborted)
- xfs_trans_ail_delete(lip, 0);
- xfs_buf_item_relse(bip->bli_buf);
- return true;
+ ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
+ test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
+ ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+ xfs_buf_item_relse(bip);
}
/*
@@ -600,6 +668,15 @@ xfs_buf_item_put(
* if necessary but do not unlock the buffer. This is for support of
* xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
* free the item.
+ *
+ * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
+ * perform a completion abort of any objects attached to the buffer for IO
+ * tracking purposes. This generally only happens in shutdown situations,
+ * normally xfs_buf_item_unpin() will drop the last BLI reference and perform
+ * completion processing. However, because transaction completion can race with
+ * checkpoint completion during a shutdown, this release context may end up
+ * being the last active reference to the BLI and so needs to perform this
+ * cleanup.
*/
STATIC void
xfs_buf_item_release(
@@ -607,18 +684,19 @@ xfs_buf_item_release(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- bool released;
bool hold = bip->bli_flags & XFS_BLI_HOLD;
bool stale = bip->bli_flags & XFS_BLI_STALE;
-#if defined(DEBUG) || defined(XFS_WARN)
- bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
- bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
bool aborted = test_bit(XFS_LI_ABORTED,
&lip->li_flags);
+ bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
+#if defined(DEBUG) || defined(XFS_WARN)
+ bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
#endif
trace_xfs_buf_item_release(bip);
+ ASSERT(xfs_buf_islocked(bp));
+
/*
* The bli dirty state should match whether the blf has logged segments
* except for ordered buffers, where only the bli should be dirty.
@@ -634,16 +712,56 @@ xfs_buf_item_release(
bp->b_transp = NULL;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
+ /* If there are other references, then we have nothing to do. */
+ if (!atomic_dec_and_test(&bip->bli_refcount))
+ goto out_release;
+
+ /*
+ * Stale buffer completion frees the BLI, unlocks and releases the
+ * buffer. Neither the BLI or buffer are safe to reference after this
+ * call, so there's nothing more we need to do here.
+ *
+ * If we get here with a stale buffer and references to the BLI remain,
+ * we must not unlock the buffer as the last BLI reference owns lock
+ * context, not us.
+ */
+ if (stale) {
+ xfs_buf_item_finish_stale(bip);
+ xfs_buf_relse(bp);
+ ASSERT(!hold);
+ return;
+ }
+
/*
- * Unref the item and unlock the buffer unless held or stale. Stale
- * buffers remain locked until final unpin unless the bli is freed by
- * the unref call. The latter implies shutdown because buffer
- * invalidation dirties the bli and transaction.
+ * Dirty or clean, aborted items are done and need to be removed from
+ * the AIL and released. This frees the BLI, but leaves the buffer
+ * locked and referenced.
*/
- released = xfs_buf_item_put(bip);
- if (hold || (stale && !released))
+ if (aborted || xlog_is_shutdown(lip->li_log)) {
+ ASSERT(list_empty(&bip->bli_buf->b_li_list));
+ xfs_buf_item_done(bp);
+ goto out_release;
+ }
+
+ /*
+ * Clean, unreferenced BLIs can be immediately freed, leaving the buffer
+ * locked and referenced.
+ *
+ * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback.
+ */
+ if (!dirty)
+ xfs_buf_item_relse(bip);
+ else
+ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
+
+ /* Not safe to reference the BLI from here */
+out_release:
+ /*
+ * If we get here with a stale buffer, we must not unlock the
+ * buffer as the last BLI reference owns lock context, not us.
+ */
+ if (stale || hold)
return;
- ASSERT(!stale || aborted);
xfs_buf_relse(bp);
}
@@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_push = xfs_buf_item_push,
};
-STATIC void
-xfs_buf_item_get_format(
- struct xfs_buf_log_item *bip,
- int count)
-{
- ASSERT(bip->bli_formats == NULL);
- bip->bli_format_count = count;
-
- if (count == 1) {
- bip->bli_formats = &bip->__bli_format;
- return;
- }
-
- bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
- GFP_KERNEL | __GFP_NOFAIL);
-}
-
-STATIC void
-xfs_buf_item_free_format(
- struct xfs_buf_log_item *bip)
-{
- if (bip->bli_formats != &bip->__bli_format) {
- kfree(bip->bli_formats);
- bip->bli_formats = NULL;
- }
-}
-
/*
* Allocate a new buf log item to go with the given buffer.
* Set the buffer's b_log_item field to point to the new
@@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format(
return false;
}
-STATIC void
-xfs_buf_item_free(
- struct xfs_buf_log_item *bip)
-{
- xfs_buf_item_free_format(bip);
- kvfree(bip->bli_item.li_lv_shadow);
- kmem_cache_free(xfs_buf_item_cache, bip);
-}
-
-/*
- * xfs_buf_item_relse() is called when the buf log item is no longer needed.
- */
-void
-xfs_buf_item_relse(
- struct xfs_buf *bp)
-{
- struct xfs_buf_log_item *bip = bp->b_log_item;
-
- trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
-
- if (atomic_read(&bip->bli_refcount))
- return;
- bp->b_log_item = NULL;
- xfs_buf_rele(bp);
- xfs_buf_item_free(bip);
-}
-
void
xfs_buf_item_done(
struct xfs_buf *bp)
@@ -1023,5 +1086,5 @@ xfs_buf_item_done(
xfs_trans_ail_delete(&bp->b_log_item->bli_item,
(bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
SHUTDOWN_CORRUPT_INCORE);
- xfs_buf_item_relse(bp);
+ xfs_buf_item_relse(bp->b_log_item);
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index e10e324cd245..416890b84f8c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -49,8 +49,7 @@ struct xfs_buf_log_item {
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_done(struct xfs_buf *bp);
-void xfs_buf_item_relse(struct xfs_buf *);
-bool xfs_buf_item_put(struct xfs_buf_log_item *);
+void xfs_buf_item_put(struct xfs_buf_log_item *bip);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_inode_iodone(struct xfs_buf *);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4e32f0860b7..0bd8022e47b4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1398,11 +1398,9 @@ xfs_qm_dqflush(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
trace_xfs_dqflush(dqp);
-
- xfs_qm_dqunpin_wait(dqp);
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 48254a72071b..0b41b18debf3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range(
}
#define XFS_FALLOC_FL_SUPPORTED \
- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
- FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
+ (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
+ FALLOC_FL_UNSHARE_RANGE)
STATIC long
__xfs_file_fallocate(
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 726e29b837e6..bbc2f2973dcc 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -979,7 +979,15 @@ xfs_reclaim_inode(
*/
if (xlog_is_shutdown(ip->i_mount->m_log)) {
xfs_iunpin_wait(ip);
+ /*
+ * Avoid a ABBA deadlock on the inode cluster buffer vs
+ * concurrent xfs_ifree_cluster() trying to mark the inode
+ * stale. We don't need the inode locked to run the flush abort
+ * code, but the flush abort needs to lock the cluster buffer.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iflush_shutdown_abort(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
goto reclaim;
}
if (xfs_ipincount(ip))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ee3e0f284287..761a996a857c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1635,7 +1635,7 @@ retry:
iip = ip->i_itemp;
if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
- ASSERT(iip->ili_last_fields);
+ ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log));
goto out_iunlock;
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c6cb0b6b9e46..285e27ff89e2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -758,11 +758,14 @@ xfs_inode_item_push(
* completed and items removed from the AIL before the next push
* attempt.
*/
+ trace_xfs_inode_push_stale(ip, _RET_IP_);
return XFS_ITEM_PINNED;
}
- if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp))
+ if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) {
+ trace_xfs_inode_push_pinned(ip, _RET_IP_);
return XFS_ITEM_PINNED;
+ }
if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f66d2d430e4f..a80cb6b9969a 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -793,8 +793,10 @@ xlog_cil_ail_insert(
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
- if (aborted)
+ if (aborted) {
+ trace_xlog_ail_insert_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
lip->li_ops->iop_release(lip);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 08443ceec329..866c71d9fbae 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -320,7 +320,7 @@ xfs_mru_cache_create(
xfs_mru_cache_free_func_t free_func)
{
struct xfs_mru_cache *mru = NULL;
- int err = 0, grp;
+ int grp;
unsigned int grp_time;
if (mrup)
@@ -341,8 +341,8 @@ xfs_mru_cache_create(
mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists),
GFP_KERNEL | __GFP_NOFAIL);
if (!mru->lists) {
- err = -ENOMEM;
- goto exit;
+ kfree(mru);
+ return -ENOMEM;
}
for (grp = 0; grp < mru->grp_count; grp++)
@@ -361,14 +361,7 @@ xfs_mru_cache_create(
mru->free_func = free_func;
mru->data = data;
*mrup = mru;
-
-exit:
- if (err && mru && mru->lists)
- kfree(mru->lists);
- if (err && mru)
- kfree(mru);
-
- return err;
+ return 0;
}
/*
@@ -425,10 +418,6 @@ xfs_mru_cache_insert(
{
int error = -EINVAL;
- ASSERT(mru && mru->lists);
- if (!mru || !mru->lists)
- goto out_free;
-
error = -ENOMEM;
if (radix_tree_preload(GFP_KERNEL))
goto out_free;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 417439b58785..fa135ac26471 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,6 +134,7 @@ xfs_qm_dqpurge(
dqp->q_flags |= XFS_DQFLAG_FREEING;
+ xfs_qm_dqunpin_wait(dqp);
xfs_dqflock(dqp);
/*
@@ -465,6 +466,7 @@ xfs_qm_dquot_isolate(
struct xfs_dquot *dqp = container_of(item,
struct xfs_dquot, q_lru);
struct xfs_qm_isolate *isol = arg;
+ enum lru_status ret = LRU_SKIP;
if (!xfs_dqlock_nowait(dqp))
goto out_miss_busy;
@@ -478,6 +480,16 @@ xfs_qm_dquot_isolate(
goto out_miss_unlock;
/*
+ * If the dquot is pinned or dirty, rotate it to the end of the LRU to
+ * give some time for it to be cleaned before we try to isolate it
+ * again.
+ */
+ ret = LRU_ROTATE;
+ if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) {
+ goto out_miss_unlock;
+ }
+
+ /*
* This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
*/
@@ -492,41 +504,14 @@ xfs_qm_dquot_isolate(
}
/*
- * If the dquot is dirty, flush it. If it's already being flushed, just
- * skip it so there is time for the IO to complete before we try to
- * reclaim it again on the next LRU pass.
+ * The dquot may still be under IO, in which case the flush lock will be
+ * held. If we can't get the flush lock now, just skip over the dquot as
+ * if it was dirty.
*/
if (!xfs_dqflock_nowait(dqp))
goto out_miss_unlock;
- if (XFS_DQ_IS_DIRTY(dqp)) {
- struct xfs_buf *bp = NULL;
- int error;
-
- trace_xfs_dqreclaim_dirty(dqp);
-
- /* we have to drop the LRU lock to flush the dquot */
- spin_unlock(&lru->lock);
-
- error = xfs_dquot_use_attached_buf(dqp, &bp);
- if (!bp || error == -EAGAIN) {
- xfs_dqfunlock(dqp);
- goto out_unlock_dirty;
- }
-
- /*
- * dqflush completes dqflock on error, and the delwri ioend
- * does it on success.
- */
- error = xfs_qm_dqflush(dqp, bp);
- if (error)
- goto out_unlock_dirty;
-
- xfs_buf_delwri_queue(bp, &isol->buffers);
- xfs_buf_relse(bp);
- goto out_unlock_dirty;
- }
-
+ ASSERT(!XFS_DQ_IS_DIRTY(dqp));
xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
@@ -548,13 +533,7 @@ out_miss_unlock:
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- return LRU_SKIP;
-
-out_unlock_dirty:
- trace_xfs_dqreclaim_busy(dqp);
- XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- xfs_dqunlock(dqp);
- return LRU_RETRY;
+ return ret;
}
static unsigned long
@@ -1486,7 +1465,6 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
- struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@@ -1497,34 +1475,8 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
- /*
- * The only way the dquot is already flush locked by the time quotacheck
- * gets here is if reclaim flushed it before the dqadjust walk dirtied
- * it for the final time. Quotacheck collects all dquot bufs in the
- * local delwri queue before dquots are dirtied, so reclaim can't have
- * possibly queued it for I/O. The only way out is to push the buffer to
- * cycle the flush lock.
- */
- if (!xfs_dqflock_nowait(dqp)) {
- /* buf is pinned in-core by delwri list */
- error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp);
- if (error)
- goto out_unlock;
-
- if (!(bp->b_flags & _XBF_DELWRI_Q)) {
- error = -EAGAIN;
- xfs_buf_relse(bp);
- goto out_unlock;
- }
- xfs_buf_unlock(bp);
-
- xfs_buf_delwri_pushbuf(bp, buffer_list);
- xfs_buf_rele(bp);
-
- error = -EAGAIN;
- goto out_unlock;
- }
+ xfs_qm_dqunpin_wait(dqp);
+ xfs_dqflock(dqp);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6484c596ecea..736eb0924573 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom(
kfree(nmp);
+ trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
+
if (min_logfsbs > mp->m_sb.sb_logblocks)
return -EINVAL;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0bc4b5489078..bb0a82635a77 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2020,14 +2020,13 @@ xfs_remount_rw(
int error;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
- bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
+ xfs_readonly_buftarg(mp->m_logdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only logdev");
return -EACCES;
}
- if (mp->m_rtdev_targp &&
- bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
+ if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only rtdev");
return -EACCES;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 01d284a1c759..ba45d801df1c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_iodone_async);
@@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__field(xfs_ino_t, ino)
__field(int, count)
__field(int, pincount)
+ __field(unsigned long, iflags)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
@@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__entry->ino = ip->i_ino;
__entry->count = atomic_read(&VFS_I(ip)->i_count);
__entry->pincount = atomic_read(&ip->i_pincount);
+ __entry->iflags = ip->i_flags;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
+ TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
__entry->pincount,
+ __entry->iflags,
(char *)__entry->caller_ip)
)
@@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_inode_push_pinned);
+DEFINE_IREF_EVENT(xfs_inode_push_stale);
DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name),
@@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
+DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
+DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c6657072361a..b4a07af513ba 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -742,8 +742,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
- if (abort)
+ if (abort) {
+ trace_xfs_trans_free_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->iop_release)
lip->li_ops->iop_release(lip);
}
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 80add26c0111..01315ed75502 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -727,7 +727,7 @@ xfs_select_zone(
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
- if (oz)
+ if (oz || xfs_is_shutdown(mp))
break;
schedule();
}
@@ -777,26 +777,6 @@ xfs_mark_rtg_boundary(
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
}
-static void
-xfs_submit_zoned_bio(
- struct iomap_ioend *ioend,
- struct xfs_open_zone *oz,
- bool is_seq)
-{
- ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
- ioend->io_private = oz;
- atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
-
- if (is_seq) {
- ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
- ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
- } else {
- xfs_mark_rtg_boundary(ioend);
- }
-
- submit_bio(&ioend->io_bio);
-}
-
/*
* Cache the last zone written to for an inode so that it is considered first
* for subsequent writes.
@@ -891,6 +871,26 @@ xfs_zone_cache_create_association(
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
}
+static void
+xfs_submit_zoned_bio(
+ struct iomap_ioend *ioend,
+ struct xfs_open_zone *oz,
+ bool is_seq)
+{
+ ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
+ ioend->io_private = oz;
+ atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
+
+ if (is_seq) {
+ ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
+ ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
+ } else {
+ xfs_mark_rtg_boundary(ioend);
+ }
+
+ submit_bio(&ioend->io_bio);
+}
+
void
xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b085f161ed22..040c0036320f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3608,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode);
extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
extern const struct dentry_operations simple_dentry_operations;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 065c17385e53..f43f075852c0 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -265,21 +265,20 @@ struct netfs_io_request {
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
refcount_t ref;
unsigned long flags;
-#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
-#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
-#define NETFS_RREQ_FAILED 4 /* The request failed */
-#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */
-#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
-#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
-#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
+#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */
+#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */
+#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */
+#define NETFS_RREQ_FAILED 3 /* The request failed */
+#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */
+#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */
+#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */
+#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */
+#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */
+#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
-#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
-#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
-#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
- void (*cleanup)(struct netfs_io_request *req);
};
/*
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 333d2e38dd2c..73e96ccbe830 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -50,12 +50,14 @@
#define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \
- EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_collect, "COLLECT") \
EM(netfs_rreq_trace_complete, "COMPLET") \
+ EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_dirty, "DIRTY ") \
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_free, "FREE ") \
+ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
+ EM(netfs_rreq_trace_recollect, "RECLLCT") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \
@@ -63,13 +65,15 @@
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \
EM(netfs_rreq_trace_unmark, "UNMARK ") \
+ EM(netfs_rreq_trace_unpause, "UNPAUSE") \
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
- EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
- EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \
+ EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \
+ EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \
+ EM(netfs_rreq_trace_waited_ip, "DONE-IP") \
+ EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \
+ EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \
- EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \
- EM(netfs_rreq_trace_unpause, "UNPAUSE") \
E_(netfs_rreq_trace_write_done, "WR-DONE")
#define netfs_sreq_sources \
@@ -82,6 +86,7 @@
E_(NETFS_WRITE_TO_CACHE, "WRIT")
#define netfs_sreq_traces \
+ EM(netfs_sreq_trace_abandoned, "ABNDN") \
EM(netfs_sreq_trace_add_donations, "+DON ") \
EM(netfs_sreq_trace_added, "ADD ") \
EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \
@@ -89,6 +94,7 @@
EM(netfs_sreq_trace_cache_write, "CA-WR") \
EM(netfs_sreq_trace_cancel, "CANCL") \
EM(netfs_sreq_trace_clear, "CLEAR") \
+ EM(netfs_sreq_trace_consumed, "CONSM") \
EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
EM(netfs_sreq_trace_donate_to_next, "DON-N") \
@@ -96,7 +102,12 @@
EM(netfs_sreq_trace_fail, "FAIL ") \
EM(netfs_sreq_trace_free, "FREE ") \
EM(netfs_sreq_trace_hit_eof, "EOF ") \
- EM(netfs_sreq_trace_io_progress, "IO ") \
+ EM(netfs_sreq_trace_io_bad, "I-BAD") \
+ EM(netfs_sreq_trace_io_malformed, "I-MLF") \
+ EM(netfs_sreq_trace_io_unknown, "I-UNK") \
+ EM(netfs_sreq_trace_io_progress, "I-OK ") \
+ EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \
+ EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
@@ -142,8 +153,8 @@
#define netfs_sreq_ref_traces \
EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \
- EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \
- EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \
+ EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \
+ EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \
@@ -366,7 +377,7 @@ TRACE_EVENT(netfs_sreq,
__entry->slot = sreq->io_iter.folioq_slot;
),
- TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d",
+ TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->what, netfs_sreq_traces),
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5111ec040c53..73648d26a622 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1666,11 +1666,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
io_req_flags_t io_file_get_flags(struct file *file)
{
+ struct inode *inode = file_inode(file);
io_req_flags_t res = 0;
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
- if (S_ISREG(file_inode(file)->i_mode))
+ if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE))
res |= REQ_F_ISREG;
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
res |= REQ_F_SUPPORT_NOWAIT;
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index d34df4306b87..222b39fc2629 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints,
int err;
stats = objagg_hints_stats_get(objagg_hints);
- if (IS_ERR(stats))
+ if (IS_ERR(stats)) {
+ *errmsg = "objagg_hints_stats_get() failed.";
return PTR_ERR(stats);
+ }
err = __check_expect_stats(stats, expect_stats, errmsg);
objagg_stats_put(stats);
return err;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 589b26c2d553..9a11a38a6770 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags)
struct file *file;
struct inode *inode;
const char *anon_name = "[secretmem]";
- int err;
- inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
+ inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
if (IS_ERR(inode))
return ERR_CAST(inode);
- err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL);
- if (err) {
- file = ERR_PTR(err);
- goto err_free_inode;
- }
-
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
O_RDWR, &secretmem_fops);
if (IS_ERR(file))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 66052d6aaa1d..4d5ace9d245d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_le_set_ext_adv_params *rp = data;
- struct hci_cp_le_set_ext_adv_params *cp;
- struct adv_info *adv_instance;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS);
- if (!cp)
- return rp->status;
-
- hci_dev_lock(hdev);
- hdev->adv_addr_type = cp->own_addr_type;
- if (!cp->handle) {
- /* Store in hdev for instance 0 */
- hdev->adv_tx_power = rp->tx_power;
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- adv_instance->tx_power = rp->tx_power;
- }
- /* Update adv data as tx power is known now */
- hci_update_adv_data(hdev, cp->handle);
-
- hci_dev_unlock(hdev);
-
- return rp->status;
-}
-
static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -4164,8 +4130,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
hci_cc_le_read_num_adv_sets,
sizeof(struct hci_rp_le_read_num_supported_adv_sets)),
- HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param,
- sizeof(struct hci_rp_le_set_ext_adv_params)),
HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE,
hci_cc_le_set_ext_adv_enable),
HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 6687f2a4d1eb..77b3691f3423 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int
+hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv,
+ const struct hci_cp_le_set_ext_adv_params *cp,
+ struct hci_rp_le_set_ext_adv_params *rp)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp),
+ cp, HCI_CMD_TIMEOUT);
+
+ /* If command return a status event, skb will be set to -ENODATA */
+ if (skb == ERR_PTR(-ENODATA))
+ return 0;
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*rp)) {
+ bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len);
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ memcpy(rp, skb->data, sizeof(*rp));
+ kfree_skb(skb);
+
+ if (!rp->status) {
+ hdev->adv_addr_type = cp->own_addr_type;
+ if (!cp->handle) {
+ /* Store in hdev for instance 0 */
+ hdev->adv_tx_power = rp->tx_power;
+ } else if (adv) {
+ adv->tx_power = rp->tx_power;
+ }
+ }
+
+ return rp->status;
+}
+
+static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
+ u8 len;
+ struct adv_info *adv = NULL;
+ int err;
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->adv_data_changed)
+ return 0;
+ }
+
+ len = eir_create_adv_data(hdev, instance, pdu->data,
+ HCI_MAX_EXT_AD_LENGTH);
+
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
+ struct_size(pdu, data, len), pdu,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ /* Update data if the command succeed */
+ if (adv) {
+ adv->adv_data_changed = false;
+ } else {
+ memcpy(hdev->adv_data, pdu->data, len);
+ hdev->adv_data_len = len;
+ }
+
+ return 0;
+}
+
+static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_adv_data cp;
+ u8 len;
+
+ memset(&cp, 0, sizeof(cp));
+
+ len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data));
+
+ /* There's nothing to do if the data hasn't changed */
+ if (hdev->adv_data_len == len &&
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return 0;
+
+ memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ hdev->adv_data_len = len;
+
+ cp.length = len;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return 0;
+
+ if (ext_adv_capable(hdev))
+ return hci_set_ext_adv_data_sync(hdev, instance);
+
+ return hci_set_adv_data_sync(hdev, instance);
+}
+
int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
bool connectable;
u32 flags;
bdaddr_t random_addr;
@@ -1316,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.secondary_phy = HCI_ADV_PHY_1M;
}
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -1822,79 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
-static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
- HCI_MAX_EXT_AD_LENGTH);
- u8 len;
- struct adv_info *adv = NULL;
- int err;
-
- if (instance) {
- adv = hci_find_adv_instance(hdev, instance);
- if (!adv || !adv->adv_data_changed)
- return 0;
- }
-
- len = eir_create_adv_data(hdev, instance, pdu->data,
- HCI_MAX_EXT_AD_LENGTH);
-
- pdu->length = len;
- pdu->handle = adv ? adv->handle : instance;
- pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- struct_size(pdu, data, len), pdu,
- HCI_CMD_TIMEOUT);
- if (err)
- return err;
-
- /* Update data if the command succeed */
- if (adv) {
- adv->adv_data_changed = false;
- } else {
- memcpy(hdev->adv_data, pdu->data, len);
- hdev->adv_data_len = len;
- }
-
- return 0;
-}
-
-static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- struct hci_cp_le_set_adv_data cp;
- u8 len;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data));
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return 0;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
-int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return 0;
-
- if (ext_adv_capable(hdev))
- return hci_set_ext_adv_data_sync(hdev, instance);
-
- return hci_set_adv_data_sync(hdev, instance);
-}
-
int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
bool force)
{
@@ -1970,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
- int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- err = hci_clear_adv_sets_sync(hdev, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_clear_adv_sets_sync(hdev, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2004,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err = 0;
+ int err;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2109,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
- int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_disable_ext_adv_instance_sync(hdev, 0x00);
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
@@ -2481,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev)
int err;
int old_state;
+ /* If controller is not advertising we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
/* If already been paused there is nothing to do. */
if (hdev->advertising_paused)
return 0;
@@ -6277,6 +6321,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
struct hci_conn *conn)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
int err;
bdaddr_t random_addr;
u8 own_addr_type;
@@ -6318,8 +6363,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
if (err)
return err;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d540f7b4f75f..1485b455ade4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1080,7 +1080,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data)
struct mgmt_mesh_tx *mesh_tx;
hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
- hci_disable_advertising_sync(hdev);
+ if (list_empty(&hdev->adv_instances))
+ hci_disable_advertising_sync(hdev);
mesh_tx = mgmt_mesh_next(hdev, NULL);
if (mesh_tx)
@@ -2153,6 +2154,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data)
else
hci_dev_clear_flag(hdev, HCI_MESH);
+ hdev->le_scan_interval = __le16_to_cpu(cp->period);
+ hdev->le_scan_window = __le16_to_cpu(cp->window);
+
len -= sizeof(*cp);
/* If filters don't fit, forward all adv pkts */
@@ -2167,6 +2171,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_set_mesh *cp = data;
struct mgmt_pending_cmd *cmd;
+ __u16 period, window;
int err = 0;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -2180,6 +2185,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
MGMT_STATUS_INVALID_PARAMS);
+ /* Keep allowed ranges in sync with set_scan_params() */
+ period = __le16_to_cpu(cp->period);
+
+ if (period < 0x0004 || period > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ window = __le16_to_cpu(cp->window);
+
+ if (window < 0x0004 || window > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (window > period)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len);
@@ -6432,6 +6454,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS,
MGMT_STATUS_NOT_SUPPORTED);
+ /* Keep allowed ranges in sync with set_mesh() */
interval = __le16_to_cpu(cp->interval);
if (interval < 0x0004 || interval > 0x4000)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 30a5e9460d00..5a49eb99e5c4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -319,8 +319,8 @@ static int ip_rcv_finish_core(struct net *net,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int err, drop_reason;
struct rtable *rt;
+ int drop_reason;
if (ip_can_use_hint(skb, iph, hint)) {
drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
@@ -345,9 +345,10 @@ static int ip_rcv_finish_core(struct net *net,
break;
case IPPROTO_UDP:
if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
- err = udp_v4_early_demux(skb);
- if (unlikely(err))
+ drop_reason = udp_v4_early_demux(skb);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 2dd6bd3a3011..b72bf8a08d48 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev)
t = rose_node;
rose_node = rose_node->next;
- for (i = 0; i < t->count; i++) {
+ for (i = t->count - 1; i >= 0; i--) {
if (t->neighbour[i] != s)
continue;
t->count--;
- switch (i) {
- case 0:
- t->neighbour[0] = t->neighbour[1];
- fallthrough;
- case 1:
- t->neighbour[1] = t->neighbour[2];
- break;
- case 2:
- break;
- }
+ memmove(&t->neighbour[i], &t->neighbour[i + 1],
+ sizeof(t->neighbour[0]) *
+ (t->count - i));
}
if (t->count <= 0)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c5e3673aadbe..d8a33486c511 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -780,15 +780,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
@@ -797,17 +794,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -816,6 +804,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 0fa244f16876..7b943fbafcc3 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1724,7 +1724,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len);
/* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */
while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count))
- maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len);
+ maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b370070194fa..7eccd6708d66 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
u16 proto,
struct vmci_handle handle)
{
+ memset(pkt, 0, sizeof(*pkt));
+
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
@@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
- memset(&pkt->proto, 0, sizeof(pkt->proto));
- memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 9984413be9f0..68f8e479ac36 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client)
_exit(EXIT_FAILURE);
}
+ ret = read(fd_coredump, &c, 1);
+
close(fd_coredump);
close(fd_server);
close(fd_peer_pidfd);
close(fd_core_file);
+
+ if (ret < 1)
+ _exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
self->pid_coredump_server = pid_coredump_server;
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1a8e85afe9aa..1926ef6b40ab 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void)
mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
&mfd);
+ assert(mfd_buffer != MAP_FAILED);
+ assert(mfd > 0);
}
FIXTURE(iommufd)
@@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ size_t mmap_buffer_size;
unsigned long size;
int mmap_flags;
void *vrc;
@@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
- rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
- if (rc || !self->buffer) {
- SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
- variant->buffer_size, rc);
- }
-
mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ mmap_buffer_size = variant->buffer_size;
if (variant->hugepages) {
/*
* MAP_POPULATE will cause the kernel to fail mmap if THPs are
* not available.
*/
mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+
+ /*
+ * Allocation must be aligned to the HUGEPAGE_SIZE, because the
+ * following mmap() will automatically align the length to be a
+ * multiple of the underlying huge page size. Failing to do the
+ * same at this allocation will result in a memory overwrite by
+ * the mmap().
+ */
+ if (mmap_buffer_size < HUGEPAGE_SIZE)
+ mmap_buffer_size = HUGEPAGE_SIZE;
+ }
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ mmap_buffer_size, rc);
}
assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
- vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
assert(vrc == self->buffer);
@@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
- munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
+ free(self->buffer);
+ free(self->bitmap);
teardown_iommufd(self->fd, _metadata);
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 72f6636e5d90..6e967b58acfd 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
{
int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
int mfd = memfd_create("buffer", mfd_flags);
+ void *buf = MAP_FAILED;
if (mfd <= 0)
return MAP_FAILED;
if (ftruncate(mfd, length))
- return MAP_FAILED;
+ goto out;
*mfd_p = mfd;
- return mmap(0, length, prot, flags, mfd, 0);
+ buf = mmap(0, length, prot, flags, mfd, 0);
+out:
+ if (buf == MAP_FAILED)
+ close(mfd);
+ return buf;
}
/*