diff options
author | Chuanxiao Dong <chuanxiao.dong@intel.com> | 2017-06-26 10:20:49 +0300 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2017-06-26 11:31:49 +0300 |
commit | 62d02fd1f807bf5a259a242c483c9fb98a242630 (patch) | |
tree | a1094f280528362c80c75c55af904d86db330bfd /drivers/gpu/drm | |
parent | e274086e473c0cbea18051ae0a78a05f8d658f47 (diff) | |
download | linux-62d02fd1f807bf5a259a242c483c9fb98a242630.tar.xz |
drm/i915/gvt: Fix possible recursive locking issue
vfio_unpin_pages will hold a read semaphore however it is already hold
in the same thread by vfio ioctl. It will cause below warning:
[ 5102.127454] ============================================
[ 5102.133379] WARNING: possible recursive locking detected
[ 5102.139304] 4.12.0-rc4+ #3 Not tainted
[ 5102.143483] --------------------------------------------
[ 5102.149407] qemu-system-x86/1620 is trying to acquire lock:
[ 5102.155624] (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0
[ 5102.165626]
but task is already holding lock:
[ 5102.172134] (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.182522]
other info that might help us debug this:
[ 5102.189806] Possible unsafe locking scenario:
[ 5102.196411] CPU0
[ 5102.199136] ----
[ 5102.201861] lock(&container->group_lock);
[ 5102.206527] lock(&container->group_lock);
[ 5102.211191]
*** DEADLOCK ***
[ 5102.217796] May be due to missing lock nesting notation
[ 5102.225370] 3 locks held by qemu-system-x86/1620:
[ 5102.230618] #0: (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.241482] #1: (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70
[ 5102.253713] #2: (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120
[ 5102.265163]
stack backtrace:
[ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3
[ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015
[ 5102.289445] Call Trace:
[ 5102.292175] dump_stack+0x85/0xc7
[ 5102.295871] validate_chain.isra.21+0x9da/0xaf0
[ 5102.300925] __lock_acquire+0x405/0x820
[ 5102.305202] lock_acquire+0xc7/0x220
[ 5102.309191] ? vfio_unpin_pages+0x96/0xf0
[ 5102.313666] down_read+0x2b/0x50
[ 5102.317259] ? vfio_unpin_pages+0x96/0xf0
[ 5102.321732] vfio_unpin_pages+0x96/0xf0
[ 5102.326024] intel_vgpu_iommu_notifier+0xe5/0x120
[ 5102.331283] notifier_call_chain+0x4a/0x70
[ 5102.335851] __blocking_notifier_call_chain+0x4d/0x70
[ 5102.341490] blocking_notifier_call_chain+0x16/0x20
[ 5102.346935] vfio_iommu_type1_ioctl+0x87b/0x920
[ 5102.351994] vfio_fops_unl_ioctl+0x81/0x280
[ 5102.356660] ? __fget+0xf0/0x210
[ 5102.360261] do_vfs_ioctl+0x93/0x6a0
[ 5102.364247] ? __fget+0x111/0x210
[ 5102.367942] SyS_ioctl+0x41/0x70
[ 5102.371542] entry_SYSCALL_64_fastpath+0x1f/0xbe
put the vfio_unpin_pages in a workqueue can fix this.
v2:
- use for style instead of do{}while(1). (Zhenyu)
v3:
- rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu)
Fixes: 659643f7d814 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT")
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gvt.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 55 |
2 files changed, 48 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 930732e5c780..8b5876d63624 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -183,6 +183,9 @@ struct intel_vgpu { struct kvm *kvm; struct work_struct release_work; atomic_t released; + struct work_struct unpin_work; + spinlock_t unpin_lock; /* To protect unpin_list */ + struct list_head unpin_list; } vdev; #endif }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1ae0b4083ce1..56a8d6a7d9c6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -78,6 +78,7 @@ struct gvt_dma { struct rb_node node; gfn_t gfn; unsigned long iova; + struct list_head list; }; static inline bool handle_valid(unsigned long handle) @@ -166,6 +167,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new->gfn = gfn; new->iova = iova; + INIT_LIST_HEAD(&new->list); mutex_lock(&vgpu->vdev.cache_lock); while (*link) { @@ -197,26 +199,52 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, kfree(entry); } -static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) +static void intel_vgpu_unpin_work(struct work_struct *work) { + struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu, + vdev.unpin_work); struct device *dev = mdev_dev(vgpu->vdev.mdev); struct gvt_dma *this; - unsigned long g1; - int rc; + unsigned long gfn; + + for (;;) { + spin_lock(&vgpu->vdev.unpin_lock); + if (list_empty(&vgpu->vdev.unpin_list)) { + spin_unlock(&vgpu->vdev.unpin_lock); + break; + } + this = list_first_entry(&vgpu->vdev.unpin_list, + struct gvt_dma, list); + list_del(&this->list); + spin_unlock(&vgpu->vdev.unpin_lock); + + gfn = this->gfn; + vfio_unpin_pages(dev, &gfn, 1); + kfree(this); + } +} + +static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct gvt_dma *this; mutex_lock(&vgpu->vdev.cache_lock); this = __gvt_cache_find(vgpu, gfn); if (!this) { mutex_unlock(&vgpu->vdev.cache_lock); - return; + return false; } - - g1 = gfn; gvt_dma_unmap_iova(vgpu, this->iova); - rc = vfio_unpin_pages(dev, &g1, 1); - WARN_ON(rc != 1); - __gvt_cache_remove_entry(vgpu, this); + /* remove this from rb tree */ + rb_erase(&this->node, &vgpu->vdev.cache); mutex_unlock(&vgpu->vdev.cache_lock); + + /* put this to the unpin_list */ + spin_lock(&vgpu->vdev.unpin_lock); + list_move_tail(&this->list, &vgpu->vdev.unpin_list); + spin_unlock(&vgpu->vdev.unpin_lock); + + return true; } static void gvt_cache_init(struct intel_vgpu *vgpu) @@ -453,6 +481,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) } INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work); + INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work); + spin_lock_init(&vgpu->vdev.unpin_lock); + INIT_LIST_HEAD(&vgpu->vdev.unpin_list); vgpu->vdev.mdev = mdev; mdev_set_drvdata(mdev, vgpu); @@ -482,6 +513,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, struct intel_vgpu *vgpu = container_of(nb, struct intel_vgpu, vdev.iommu_notifier); + bool sched_unmap = false; if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { struct vfio_iommu_type1_dma_unmap *unmap = data; @@ -491,7 +523,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, end_gfn = gfn + unmap->size / PAGE_SIZE; while (gfn < end_gfn) - gvt_cache_remove(vgpu, gfn++); + sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++); + + if (sched_unmap) + schedule_work(&vgpu->vdev.unpin_work); } return NOTIFY_OK; |