summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/file_ops.c
diff options
context:
space:
mode:
authorMichael J. Ruhl <michael.j.ruhl@intel.com>2017-06-10 02:00:19 +0300
committerDoug Ledford <dledford@redhat.com>2017-06-27 23:58:13 +0300
commitf683c80ca68e087b55c6f9ab6ca6beb88ebc6d69 (patch)
tree7c645212a5c6642da3b2cf872e7bfbb6da7e2606 /drivers/infiniband/hw/hfi1/file_ops.c
parentfe4e74eeb24286c730672e776ac4c2c3caa19137 (diff)
downloadlinux-f683c80ca68e087b55c6f9ab6ca6beb88ebc6d69.tar.xz
IB/hfi1: Resolve kernel panics by reference counting receive contexts
Base receive contexts can be used by sub contexts. Because of this, resources for the context cannot be completely freed until all sub contexts are done using the base context. Introduce a reference count so that the base receive context can be freed only when all sub contexts are done with it. Use the provided function call for setting default send context integrity rather than the manual method. The cleanup path does not set all variables back to NULL after freeing resources. Since the clean up code can get called more than once, (e.g. during context close and on the error path), it is necessary to make sure that all the variables are NULLed. Possible crash are: BUG: unable to handle kernel paging request at 0000000001908900 IP: read_csr+0x24/0x30 [hfi1] RIP: 0010:read_csr+0x24/0x30 [hfi1] Call Trace: sc_disable+0x40/0x110 [hfi1] hfi1_file_close+0x16f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 or kernel BUG at mm/slub.c:3877! RIP: 0010:kfree+0x14f/0x170 Call Trace: hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1] ? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1] hfi1_file_close+0x20f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Fixes: Commit 62239fc6e554 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/file_ops.c')
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 2dd8758f0644..bbf80b1dd9d9 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
*ev = 0;
__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ fdata->uctxt = NULL;
+ hfi1_rcd_put(uctxt); /* fdata reference */
if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
@@ -794,16 +796,15 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
/*
- * Reset context integrity checks to default.
- * (writes to CSRs probably belong in chip.c)
+ * If a send context is allocated, reset context integrity
+ * checks to default and disable the send context.
*/
- write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
- hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
- sc_disable(uctxt->sc);
+ if (uctxt->sc) {
+ set_pio_integrity(uctxt->sc);
+ sc_disable(uctxt->sc);
+ }
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
- dd->rcd[uctxt->ctxt] = NULL;
-
hfi1_free_ctxt_rcv_groups(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
@@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd);
+
+ /* _rcd_put() should be done after releasing mutex */
+ dd->rcd[uctxt->ctxt] = NULL;
mutex_unlock(&hfi1_mutex);
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt); /* dd reference */
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
@@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags));
- if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
- clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
- return -ENOMEM;
- }
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
+ ret = -ENOMEM;
+
/* The only thing a sub context needs is the user_xxx stuff */
if (!ret)
ret = init_user_ctxt(fd);
- if (ret)
+ if (ret) {
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ hfi1_rcd_put(fd->uctxt);
+ }
} else if (!ret) {
ret = setup_base_ctxt(fd);
if (fd->uctxt->subctxt_cnt) {
@@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt;
fd->subctxt = subctxt;
+
+ hfi1_rcd_get(uctxt);
__set_bit(fd->subctxt, uctxt->in_use_ctxts);
return 1;
@@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
aspm_disable_all(dd);
fd->uctxt = uctxt;
+ /* Count the reference for the fd */
+ hfi1_rcd_get(uctxt);
+
return 0;
ctxdata_free:
dd->rcd[ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
return ret;
}
@@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
return 0;
setup_failed:
+ /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */
hfi1_free_ctxtdata(dd, uctxt);
return ret;
}