summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/group.c7
-rw-r--r--drivers/vfio/iommufd.c4
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c93
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h14
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c4
-rw-r--r--drivers/vfio/pci/nvgrace-gpu/main.c95
-rw-r--r--drivers/vfio/pci/pds/vfio_dev.c1
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c3
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c24
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c1
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c10
-rw-r--r--drivers/vfio/vfio_iommu_type1.c9
-rw-r--r--drivers/vfio/vfio_main.c3
14 files changed, 201 insertions, 69 deletions
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index 95b336de8a17..5f2b2c950bbc 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -194,11 +194,10 @@ static int vfio_df_group_open(struct vfio_device_file *df)
* implies they expected translation to exist
*/
if (!capable(CAP_SYS_RAWIO) ||
- vfio_iommufd_device_has_compat_ioas(device, df->iommufd))
+ vfio_iommufd_device_has_compat_ioas(device, df->iommufd)) {
ret = -EPERM;
- else
- ret = 0;
- goto out_put_kvm;
+ goto out_put_kvm;
+ }
}
ret = vfio_df_open(df);
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 82eba6966fa5..02852899c2ae 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -25,6 +25,10 @@ int vfio_df_iommufd_bind(struct vfio_device_file *df)
lockdep_assert_held(&vdev->dev_set->lock);
+ /* Returns 0 to permit device opening under noiommu mode */
+ if (vfio_device_is_noiommu(vdev))
+ return 0;
+
return vdev->ops->bind_iommufd(vdev, ictx, &df->devid);
}
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 0d632ba5d2a3..dda8cb3262e0 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
}
+static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
+{
+ switch (vf_data->acc_magic) {
+ case ACC_DEV_MAGIC_V2:
+ if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
+ dev_info(dev, "migration driver version<%u.%u> not match!\n",
+ vf_data->major_ver, vf_data->minor_ver);
+ return -EINVAL;
+ }
+ break;
+ case ACC_DEV_MAGIC_V1:
+ /* Correct dma address */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_acc_vf_migration_file *migf)
{
@@ -363,7 +389,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
return 0;
- if (vf_data->acc_magic != ACC_DEV_MAGIC) {
+ ret = vf_qm_version_check(vf_data, dev);
+ if (ret) {
dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
return -EINVAL;
}
@@ -399,13 +426,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return -EINVAL;
}
- ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
- if (ret) {
- dev_err(dev, "failed to write QM_VF_STATE\n");
- return ret;
- }
-
- hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
hisi_acc_vdev->match_done = true;
return 0;
}
@@ -418,7 +438,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
int vf_id = hisi_acc_vdev->vf_id;
int ret;
- vf_data->acc_magic = ACC_DEV_MAGIC;
+ vf_data->acc_magic = ACC_DEV_MAGIC_V2;
+ vf_data->major_ver = ACC_DRV_MAJOR_VER;
+ vf_data->minor_ver = ACC_DRV_MINOR_VER;
/* Save device id */
vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
@@ -441,6 +463,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return 0;
}
+static void vf_qm_xeqc_save(struct hisi_qm *qm,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ u16 eq_head, aeq_head;
+
+ eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF;
+ qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0);
+
+ aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF;
+ qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0);
+}
+
static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_acc_vf_migration_file *migf)
{
@@ -456,6 +491,20 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
if (migf->total_length < sizeof(struct acc_vf_data))
return -EINVAL;
+ if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
+ !vf_data->sqc_dma || !vf_data->cqc_dma) {
+ dev_info(dev, "resume dma addr is NULL!\n");
+ hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
+ return 0;
+ }
+
+ ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_STATE\n");
+ return -EINVAL;
+ }
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+
qm->eqe_dma = vf_data->eqe_dma;
qm->aeqe_dma = vf_data->aeqe_dma;
qm->sqc_dma = vf_data->sqc_dma;
@@ -505,23 +554,17 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
vf_data->vf_qm_state = QM_READY;
hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
- ret = vf_qm_cache_wb(vf_qm);
- if (ret) {
- dev_err(dev, "failed to writeback QM Cache!\n");
- return ret;
- }
-
ret = qm_get_regs(vf_qm, vf_data);
if (ret)
return -EINVAL;
/* Every reg is 32 bit, the dma address is 64 bit. */
- vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
- vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
/* Through SQC_BT/CQC_BT to get sqc and cqc address */
ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
@@ -537,6 +580,9 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
}
migf->total_length = sizeof(struct acc_vf_data);
+ /* Save eqc and aeqc interrupt information */
+ vf_qm_xeqc_save(vf_qm, migf);
+
return 0;
}
@@ -933,6 +979,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev
dev_err(dev, "failed to check QM INT state!\n");
return ret;
}
+
+ ret = vf_qm_cache_wb(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to writeback QM cache!\n");
+ return ret;
+ }
+
return 0;
}
@@ -1306,6 +1359,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
struct hisi_acc_vf_core_device, core_device.vdev);
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
iounmap(vf_qm->io_base);
vfio_pci_core_close_device(core_vdev);
}
@@ -1326,6 +1380,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
hisi_acc_vdev->pf_qm = pf_qm;
hisi_acc_vdev->vf_dev = pdev;
+ hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
mutex_init(&hisi_acc_vdev->state_mutex);
core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY;
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 5bab46602fad..465284168906 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -38,6 +38,9 @@
#define QM_REG_ADDR_OFFSET 0x0004
#define QM_XQC_ADDR_OFFSET 32U
+#define QM_XQC_ADDR_LOW 0x1
+#define QM_XQC_ADDR_HIGH 0x2
+
#define QM_VF_AEQ_INT_MASK 0x0004
#define QM_VF_EQ_INT_MASK 0x000c
#define QM_IFC_INT_SOURCE_V 0x0020
@@ -49,10 +52,15 @@
#define QM_EQC_DW0 0X8000
#define QM_AEQC_DW0 0X8020
+#define ACC_DRV_MAJOR_VER 1
+#define ACC_DRV_MINOR_VER 0
+
+#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC
+#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE
+
struct acc_vf_data {
#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
/* QM match information */
-#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
u64 acc_magic;
u32 qp_num;
u32 dev_id;
@@ -60,7 +68,9 @@ struct acc_vf_data {
u32 qp_base;
u32 vf_qm_state;
/* QM reserved match information */
- u32 qm_rsv_state[3];
+ u16 major_ver;
+ u16 minor_ver;
+ u32 qm_rsv_state[2];
/* QM RW regs */
u32 aeq_int_mask;
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index eb7387ee6ebd..e7d2251db626 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -1538,8 +1538,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size);
max_msg_size = (1ULL << log_max_msg_size);
/* The RQ must hold at least 4 WQEs/messages for successful QP creation */
- if (rq_size < 4 * max_msg_size)
- rq_size = 4 * max_msg_size;
+ if (rq_size < 4ULL * max_msg_size)
+ rq_size = 4ULL * max_msg_size;
memset(tracker, 0, sizeof(*tracker));
tracker->uar = mlx5_get_uars_page(mdev);
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index a7fd018aa548..9e1c57baab64 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -17,12 +17,14 @@
#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
-/* Memory size expected as non cached and reserved by the VM driver */
-#define RESMEM_SIZE SZ_1G
-
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
+#define DVSEC_BITMAP_OFFSET 0xA
+#define MIG_SUPPORTED_WITH_CACHED_RESMEM BIT(0)
+
+#define GPU_CAP_DVSEC_REGISTER 3
+
/*
* The state of the two device memory region - resmem and usemem - is
* saved as struct mem_region.
@@ -46,6 +48,7 @@ struct nvgrace_gpu_pci_core_device {
struct mem_region resmem;
/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
+ bool has_mig_hw_bug;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
@@ -66,7 +69,7 @@ nvgrace_gpu_memregion(int index,
if (index == USEMEM_REGION_INDEX)
return &nvdev->usemem;
- if (index == RESMEM_REGION_INDEX)
+ if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)
return &nvdev->resmem;
return NULL;
@@ -751,40 +754,67 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
u64 memphys, u64 memlength)
{
int ret = 0;
+ u64 resmem_size = 0;
/*
- * The VM GPU device driver needs a non-cacheable region to support
- * the MIG feature. Since the device memory is mapped as NORMAL cached,
- * carve out a region from the end with a different NORMAL_NC
- * property (called as reserved memory and represented as resmem). This
- * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while
- * exposing the rest (termed as usable memory and represented using usemem)
- * as cacheable 64b BAR (region 4 and 5).
+ * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable
+ * region to support the MIG feature owing to a hardware bug. Since the
+ * device memory is mapped as NORMAL cached, carve out a region from the end
+ * with a different NORMAL_NC property (called as reserved memory and
+ * represented as resmem). This region then is exposed as a 64b BAR
+ * (region 2 and 3) to the VM, while exposing the rest (termed as usable
+ * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5).
*
* devmem (memlength)
* |-------------------------------------------------|
* | |
* usemem.memphys resmem.memphys
+ *
+ * This hardware bug is fixed on the Grace Blackwell platforms and the
+ * presence of the bug can be determined through nvdev->has_mig_hw_bug.
+ * Thus on systems with the hardware fix, there is no need to partition
+ * the GPU device memory and the entire memory is usable and mapped as
+ * NORMAL cached (i.e. resmem size is 0).
*/
+ if (nvdev->has_mig_hw_bug)
+ resmem_size = SZ_1G;
+
nvdev->usemem.memphys = memphys;
/*
* The device memory exposed to the VM is added to the kernel by the
- * VM driver module in chunks of memory block size. Only the usable
- * memory (usemem) is added to the kernel for usage by the VM
- * workloads. Make the usable memory size memblock aligned.
+ * VM driver module in chunks of memory block size. Note that only the
+ * usable memory (usemem) is added to the kernel for usage by the VM
+ * workloads.
*/
- if (check_sub_overflow(memlength, RESMEM_SIZE,
+ if (check_sub_overflow(memlength, resmem_size,
&nvdev->usemem.memlength)) {
ret = -EOVERFLOW;
goto done;
}
/*
- * The USEMEM part of the device memory has to be MEMBLK_SIZE
- * aligned. This is a hardwired ABI value between the GPU FW and
- * VFIO driver. The VM device driver is also aware of it and make
- * use of the value for its calculation to determine USEMEM size.
+ * The usemem region is exposed as a 64B Bar composed of region 4 and 5.
+ * Calculate and save the BAR size for the region.
+ */
+ nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
+
+ /*
+ * If the hardware has the fix for MIG, there is no requirement
+ * for splitting the device memory to create RESMEM. The entire
+ * device memory is usable and will be USEMEM. Return here for
+ * such case.
+ */
+ if (!nvdev->has_mig_hw_bug)
+ goto done;
+
+ /*
+ * When the device memory is split to workaround the MIG bug on
+ * Grace Hopper, the USEMEM part of the device memory has to be
+ * MEMBLK_SIZE aligned. This is a hardwired ABI value between the
+ * GPU FW and VFIO driver. The VM device driver is also aware of it
+ * and make use of the value for its calculation to determine USEMEM
+ * size. Note that the device memory may not be 512M aligned.
*/
nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
MEMBLK_SIZE);
@@ -803,15 +833,34 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
}
/*
- * The memory regions are exposed as BARs. Calculate and save
- * the BAR size for them.
+ * The resmem region is exposed as a 64b BAR composed of region 2 and 3
+ * for Grace Hopper. Calculate and save the BAR size for the region.
*/
- nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);
done:
return ret;
}
+static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev)
+{
+ int pcie_dvsec;
+ u16 dvsec_ctrl16;
+
+ pcie_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_NVIDIA,
+ GPU_CAP_DVSEC_REGISTER);
+
+ if (pcie_dvsec) {
+ pci_read_config_word(pdev,
+ pcie_dvsec + DVSEC_BITMAP_OFFSET,
+ &dvsec_ctrl16);
+
+ if (dvsec_ctrl16 & MIG_SUPPORTED_WITH_CACHED_RESMEM)
+ return false;
+ }
+
+ return true;
+}
+
static int nvgrace_gpu_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
@@ -832,6 +881,8 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
dev_set_drvdata(&pdev->dev, &nvdev->core_device);
if (ops == &nvgrace_gpu_pci_ops) {
+ nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev);
+
/*
* Device memory properties are identified in the host ACPI
* table. Set the nvgrace_gpu_pci_core_device structure.
diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c
index 76a80ae7087b..f6e0253a8a14 100644
--- a/drivers/vfio/pci/pds/vfio_dev.c
+++ b/drivers/vfio/pci/pds/vfio_dev.c
@@ -204,6 +204,7 @@ static const struct vfio_device_ops pds_vfio_ops = {
.bind_iommufd = vfio_iommufd_physical_bind,
.unbind_iommufd = vfio_iommufd_physical_unbind,
.attach_ioas = vfio_iommufd_physical_attach_ioas,
+ .detach_ioas = vfio_iommufd_physical_detach_ioas,
};
const struct vfio_device_ops *pds_vfio_ops_info(void)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index ea2745c1ac5e..8ea38e7421df 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1813,7 +1813,8 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
cpu_to_le16(PCI_COMMAND_MEMORY);
}
- if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx ||
+ vdev->pdev->irq == IRQ_NOTCONNECTED)
vconfig[PCI_INTERRUPT_PIN] = 0;
ret = vfio_cap_init(vdev);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 1a4ed5a357d3..595503fa9ca8 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -727,15 +727,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)
{
if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
- u8 pin;
-
- if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
- vdev->nointx || vdev->pdev->is_virtfn)
- return 0;
-
- pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-
- return pin ? 1 : 0;
+ return vdev->vconfig[PCI_INTERRUPT_PIN] ? 1 : 0;
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
u16 flags;
@@ -1658,14 +1650,14 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
{
struct vm_area_struct *vma = vmf->vma;
struct vfio_pci_core_device *vdev = vma->vm_private_data;
- unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
+ unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+ unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long pfn = vma_to_pfn(vma) + pgoff;
vm_fault_t ret = VM_FAULT_SIGBUS;
- pfn = vma_to_pfn(vma) + pgoff;
-
- if (order && (pfn & ((1 << order) - 1) ||
- vmf->address & ((PAGE_SIZE << order) - 1) ||
- vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
+ if (order && (addr < vma->vm_start ||
+ addr + (PAGE_SIZE << order) > vma->vm_end ||
+ pfn & ((1 << order) - 1))) {
ret = VM_FAULT_FALLBACK;
goto out;
}
@@ -2161,7 +2153,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
return -EBUSY;
}
- if (pci_is_root_bus(pdev->bus)) {
+ if (pci_is_root_bus(pdev->bus) || pdev->is_virtfn) {
ret = vfio_assign_device_set(&vdev->vdev, vdev);
} else if (!pci_probe_reset_slot(pdev->slot)) {
ret = vfio_assign_device_set(&vdev->vdev, pdev->slot);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 8382c5834335..565966351dfa 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -259,7 +259,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
if (!is_irq_none(vdev))
return -EINVAL;
- if (!pdev->irq)
+ if (!pdev->irq || pdev->irq == IRQ_NOTCONNECTED)
return -ENODEV;
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 66b72c289284..a0595c745732 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -16,6 +16,7 @@
#include <linux/io.h>
#include <linux/vfio.h>
#include <linux/vgaarb.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include "vfio_pci_priv.h"
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index e53757d1d095..3bf1043cd795 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -388,6 +388,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
{
unsigned int done = 0;
+ if (off >= reg->size)
+ return -EINVAL;
+
+ count = min_t(size_t, count, reg->size - off);
+
if (!reg->ioaddr) {
reg->ioaddr =
ioremap(reg->addr, reg->size);
@@ -467,6 +472,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
{
unsigned int done = 0;
+ if (off >= reg->size)
+ return -EINVAL;
+
+ count = min_t(size_t, count, reg->size - off);
+
if (!reg->ioaddr) {
reg->ioaddr =
ioremap(reg->addr, reg->size);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bf391b40e576..124997ce00d6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -294,7 +294,7 @@ static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
struct rb_node *p;
for (p = rb_prev(n); p; p = rb_prev(p)) {
- struct vfio_dma *dma = rb_entry(n,
+ struct vfio_dma *dma = rb_entry(p,
struct vfio_dma, node);
vfio_dma_bitmap_free(dma);
@@ -619,6 +619,13 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
while (npage) {
if (!batch->size) {
+ /*
+ * Large mappings may take a while to repeatedly refill
+ * the batch, so conditionally relinquish the CPU when
+ * needed to avoid stalls.
+ */
+ cond_resched();
+
/* Empty batch, so refill it. */
long req_pages = min_t(long, npage, batch->capacity);
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a5a62d9d963f..ae78822f2d71 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -583,7 +583,8 @@ void vfio_df_close(struct vfio_device_file *df)
lockdep_assert_held(&device->dev_set->lock);
- vfio_assert_device_open(device);
+ if (!vfio_assert_device_open(device))
+ return;
if (device->open_count == 1)
vfio_df_device_last_close(df);
device->open_count--;