summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2016-04-28 22:16:32 +0300
committerDoug Ledford <dledford@redhat.com>2016-04-28 22:16:32 +0300
commite29bff46b9b8663e1789fbcd65d0ba05a52f08af (patch)
treecce17191cf20c60dc917c62f01d96f6b571a47d4
parentd53e181c85effdfdec97a760622330626c922d81 (diff)
parente6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 (diff)
downloadlinux-e29bff46b9b8663e1789fbcd65d0ba05a52f08af.tar.xz
Merge branch 'k.o/for-4.6-rc' into testing/4.6
-rw-r--r--MAINTAINERS4
-rw-r--r--drivers/infiniband/core/ucm.c4
-rw-r--r--drivers/infiniband/core/ucma.c3
-rw-r--r--drivers/infiniband/core/uverbs_main.c5
-rw-r--r--drivers/infiniband/core/verbs.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c24
-rw-r--r--drivers/infiniband/hw/mlx5/main.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c4
-rw-r--r--drivers/staging/rdma/hfi1/TODO2
-rw-r--r--drivers/staging/rdma/hfi1/file_ops.c91
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.c40
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.h3
-rw-r--r--drivers/staging/rdma/hfi1/qp.c2
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c11
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.c33
-rw-r--r--include/linux/mlx5/device.h11
-rw-r--r--include/rdma/ib.h16
21 files changed, 171 insertions, 98 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 849133608da7..c8025946eaae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi*
ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
M: Or Gerlitz <ogerlitz@mellanox.com>
-M: Sagi Grimberg <sagig@mellanox.com>
+M: Sagi Grimberg <sagi@grimberg.me>
M: Roi Dayan <roid@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
@@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/ulp/iser/
ISCSI EXTENSIONS FOR RDMA (ISER) TARGET
-M: Sagi Grimberg <sagig@mellanox.com>
+M: Sagi Grimberg <sagi@grimberg.me>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
L: linux-rdma@vger.kernel.org
L: target-devel@vger.kernel.org
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 4a9aa0433b07..7713ef089c3c 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -48,6 +48,7 @@
#include <asm/uaccess.h>
+#include <rdma/ib.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr;
ssize_t result;
+ if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ return -EACCES;
+
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index dd3bcceadfde..c0f3826abb30 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr;
ssize_t ret;
+ if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ return -EACCES;
+
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 28ba2cc81535..31f422a70623 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -48,6 +48,8 @@
#include <asm/uaccess.h>
+#include <rdma/ib.h>
+
#include "uverbs.h"
MODULE_AUTHOR("Roland Dreier");
@@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key;
ssize_t ret;
+ if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ return -EACCES;
+
if (count < sizeof hdr)
return -EINVAL;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 15b8adbf39c0..b65b3541e732 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq);
void ib_drain_qp(struct ib_qp *qp)
{
ib_drain_sq(qp);
- ib_drain_rq(qp);
+ if (!qp->srq)
+ ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 42a7b8952d13..3234a8be16f6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
+ memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
+ sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b4eeb783573c..b0b955724458 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
- if (user && !cq->bar2_va) {
+ if (user && !cq->bar2_pa) {
pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 124682dc5709..7574f394fdac 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp;
+ memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
+ sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e17fb5d5e033..e8993e49b8b3 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
if (pbar2_pa)
*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
+
+ if (is_t4(rdev->lldi.adapter_type))
+ return NULL;
+
return rdev->bar2_kva + bar2_qoffset;
}
@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
/*
* User mode must have bar2 access.
*/
- if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) {
+ if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma;
@@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+ unsigned long flag;
+ bool need_to_wait;
- wait_for_completion(&qp->sq_drained);
+ spin_lock_irqsave(&qp->lock, flag);
+ need_to_wait = !t4_sq_empty(&qp->wq);
+ spin_unlock_irqrestore(&qp->lock, flag);
+
+ if (need_to_wait)
+ wait_for_completion(&qp->sq_drained);
}
void c4iw_drain_rq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+ unsigned long flag;
+ bool need_to_wait;
+
+ spin_lock_irqsave(&qp->lock, flag);
+ need_to_wait = !t4_rq_empty(&qp->wq);
+ spin_unlock_irqrestore(&qp->lock, flag);
- wait_for_completion(&qp->rq_drained);
+ if (need_to_wait)
+ wait_for_completion(&qp->rq_drained);
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 99eb1c1a3b7b..6ad0489cb3c5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
- props->max_sge_rd = props->max_sge;
+ props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index e449e394963f..24f4a782e0f4 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -45,6 +45,8 @@
#include <linux/export.h>
#include <linux/uio.h>
+#include <rdma/ib.h>
+
#include "qib.h"
#include "qib_common.h"
#include "qib_user_sdma.h"
@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ssize_t ret = 0;
void *dest;
+ if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+ return -EACCES;
+
if (count < sizeof(cmd.type)) {
ret = -EINVAL;
goto bail;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index bd82a6948dc8..a9e3bcc522c4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1637,9 +1637,9 @@ bail:
spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) {
if (call_send)
- rdi->driver_f.schedule_send_no_lock(qp);
- else
rdi->driver_f.do_send(qp);
+ else
+ rdi->driver_f.schedule_send_no_lock(qp);
}
return err;
}
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
index 05de0dad8762..4c6f1d7d2eaf 100644
--- a/drivers/staging/rdma/hfi1/TODO
+++ b/drivers/staging/rdma/hfi1/TODO
@@ -3,4 +3,4 @@ July, 2015
- Remove unneeded file entries in sysfs
- Remove software processing of IB protocol and place in library for use
by qib, ipath (if still present), hfi1, and eventually soft-roce
-
+- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index 8396dc5fb6c1..c1c5bf82addb 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -49,6 +49,8 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <rdma/ib.h>
+
#include "hfi.h"
#include "pio.h"
#include "device.h"
@@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
int uctxt_required = 1;
int must_be_root = 0;
+ /* FIXME: This interface cannot continue out of staging */
+ if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+ return -EACCES;
+
if (count < sizeof(cmd)) {
ret = -EINVAL;
goto bail;
@@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL;
+
+ hfi1_user_exp_rcv_free(fdata);
+ hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
uctxt->rcvnowait = 0;
uctxt->pionowait = 0;
uctxt->event_flags = 0;
- hfi1_user_exp_rcv_free(fdata);
- hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
-
hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd);
@@ -1127,27 +1134,13 @@ bail:
static int user_init(struct file *fp)
{
- int ret;
unsigned int rcvctrl_ops = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* make sure that the context has already been setup */
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) {
- ret = -EFAULT;
- goto done;
- }
-
- /*
- * Subctxts don't need to initialize anything since master
- * has done it.
- */
- if (fd->subctxt) {
- ret = wait_event_interruptible(uctxt->wait, !test_bit(
- HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- goto expected;
- }
+ if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
+ return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
@@ -1202,19 +1195,7 @@ static int user_init(struct file *fp)
wake_up(&uctxt->wait);
}
-expected:
- /*
- * Expected receive has to be setup for all processes (including
- * shared contexts). However, it has to be done after the master
- * context has been fully configured as it depends on the
- * eager/expected split of the RcvArray entries.
- * Setting it up here ensures that the subcontexts will be waiting
- * (due to the above wait_event_interruptible() until the master
- * is setup.
- */
- ret = hfi1_user_exp_rcv_init(fp);
-done:
- return ret;
+ return 0;
}
static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
@@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp)
int ret = 0;
/*
- * Context should be set up only once (including allocation and
+ * Context should be set up only once, including allocation and
* programming of eager buffers. This is done if context sharing
* is not requested or by the master process.
*/
@@ -1282,10 +1263,29 @@ static int setup_ctxt(struct file *fp)
if (ret)
goto done;
}
+ } else {
+ ret = wait_event_interruptible(uctxt->wait, !test_bit(
+ HFI1_CTXT_MASTER_UNINIT,
+ &uctxt->event_flags));
+ if (ret)
+ goto done;
}
+
ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
goto done;
+ /*
+ * Expected receive has to be setup for all processes (including
+ * shared contexts). However, it has to be done after the master
+ * context has been fully configured as it depends on the
+ * eager/expected split of the RcvArray entries.
+ * Setting it up here ensures that the subcontexts will be waiting
+ * (due to the above wait_event_interruptible() until the master
+ * is setup.
+ */
+ ret = hfi1_user_exp_rcv_init(fp);
+ if (ret)
+ goto done;
set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
done:
@@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
{
struct hfi1_devdata *dd = filp->private_data;
- switch (whence) {
- case SEEK_SET:
- break;
- case SEEK_CUR:
- offset += filp->f_pos;
- break;
- case SEEK_END:
- offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) -
- offset;
- break;
- default:
- return -EINVAL;
- }
-
- if (offset < 0)
- return -EINVAL;
-
- if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE)
- return -EINVAL;
-
- filp->f_pos = offset;
-
- return filp->f_pos;
+ return fixed_size_llseek(filp, offset, whence,
+ (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
}
/* NOTE: assumes unsigned long is 8 bytes */
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
index c7ad0164ea9a..b3f0682a36c9 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *,
unsigned long, unsigned long);
static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+ struct mm_struct *,
unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
rbnode = rb_entry(node, struct mmu_rb_node, node);
rb_erase(node, root);
if (handler->ops->remove)
- handler->ops->remove(root, rbnode, false);
+ handler->ops->remove(root, rbnode, NULL);
}
}
@@ -176,7 +177,7 @@ unlock:
return ret;
}
-/* Caller must host handler lock */
+/* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr,
unsigned long len)
@@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
+/* Caller must *not* hold handler lock. */
static void __mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *node, bool arg)
+ struct mmu_rb_node *node, struct mm_struct *mm)
{
+ unsigned long flags;
+
/* Validity of handler and node pointers has been checked by caller. */
hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
node->len);
+ spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, handler->root);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
if (handler->ops->remove)
- handler->ops->remove(handler->root, node, arg);
+ handler->ops->remove(handler->root, node, mm);
}
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
@@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{
struct mmu_rb_handler *handler = find_mmu_handler(root);
- unsigned long flags;
if (!handler || !node)
return;
- spin_lock_irqsave(&handler->lock, flags);
- __mmu_rb_remove(handler, node, false);
- spin_unlock_irqrestore(&handler->lock, flags);
+ __mmu_rb_remove(handler, node, NULL);
}
static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
@@ -260,7 +264,7 @@ unlock:
static inline void mmu_notifier_page(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long addr)
{
- mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+ mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
}
static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
@@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
- mmu_notifier_mem_invalidate(mn, start, end);
+ mmu_notifier_mem_invalidate(mn, mm, start, end);
}
static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+ struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
struct rb_root *root = handler->root;
- struct mmu_rb_node *node;
+ struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
spin_lock_irqsave(&handler->lock, flags);
- for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
- node = __mmu_int_rb_iter_next(node, start, end - 1)) {
+ for (node = __mmu_int_rb_iter_first(root, start, end - 1);
+ node; node = ptr) {
+ /* Guard against node removal. */
+ ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
- if (handler->ops->invalidate(root, node))
- __mmu_rb_remove(handler, node, true);
+ if (handler->ops->invalidate(root, node)) {
+ spin_unlock_irqrestore(&handler->lock, flags);
+ __mmu_rb_remove(handler, node, mm);
+ spin_lock_irqsave(&handler->lock, flags);
+ }
}
spin_unlock_irqrestore(&handler->lock, flags);
}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
index f8523fdb8a18..19a306e83c7d 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -59,7 +59,8 @@ struct mmu_rb_node {
struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
int (*insert)(struct rb_root *, struct mmu_rb_node *);
- void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
+ void (*remove)(struct rb_root *, struct mmu_rb_node *,
+ struct mm_struct *);
int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
};
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index 29a5ad28019b..dc9119e1b458 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait)
* do the flush work until that QP's
* sdma work has finished.
*/
+ spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_WAIT_DMA) {
qp->s_flags &= ~RVT_S_WAIT_DMA;
hfi1_schedule_send(qp);
}
+ spin_unlock(&qp->s_lock);
}
/**
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
index 0861e095df8d..8bd56d5c783d 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
+ struct mm_struct *);
static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
@@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr;
+ if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
+ return 0;
/*
* The notifier would have been removed when the process'es mm
* was freed.
@@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
if (HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
+ mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
else
hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
@@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
continue;
if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root,
- &node->mmu, false);
+ &node->mmu, NULL);
else
hfi1_mmu_rb_remove(&fd->tid_rb_root,
&node->mmu);
@@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
}
static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
- bool notifier)
+ struct mm_struct *mm)
{
struct hfi1_filedata *fdata =
container_of(root, struct hfi1_filedata, tid_rb_root);
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index ab6b6a42000f..d53a659548e0 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
+ unsigned);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
@@ -299,7 +300,8 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
+ struct mm_struct *);
static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
@@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req,
rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
- if (rb_node)
+ if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb);
+ else
+ rb_node = NULL;
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1107,7 +1111,8 @@ retry:
goto bail;
}
if (pinned != npages) {
- unpin_vector_pages(current->mm, pages, pinned);
+ unpin_vector_pages(current->mm, pages, node->npages,
+ pinned);
ret = -EFAULT;
goto bail;
}
@@ -1147,9 +1152,9 @@ bail:
}
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned npages)
+ unsigned start, unsigned npages)
{
- hfi1_release_user_pages(mm, pages, npages, 0);
+ hfi1_release_user_pages(mm, pages + start, npages, 0);
kfree(pages);
}
@@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
&req->pq->sdma_rb_root,
(unsigned long)req->iovs[i].iov.iov_base,
req->iovs[i].iov.iov_len);
- if (!mnode)
+ if (!mnode || IS_ERR(mnode))
continue;
node = container_of(mnode, struct sdma_mmu_node, rb);
@@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
}
static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
- bool notifier)
+ struct mm_struct *mm)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
@@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock);
- unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+ /*
+ * If mm is set, we are being called by the MMU notifier and we
+ * should not pass a mm_struct to unpin_vector_page(). This is to
+ * prevent a deadlock when hfi1_release_user_pages() attempts to
+ * take the mmap_sem, which the MMU notifier has already taken.
+ */
+ unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
node->npages);
/*
* If called by the MMU notifier, we have to adjust the pinned
* page count ourselves.
*/
- if (notifier)
- current->mm->pinned_vm -= node->npages;
+ if (mm)
+ mm->pinned_vm -= node->npages;
kfree(node);
}
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8156e3c9239c..b3575f392492 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -392,6 +392,17 @@ enum {
MLX5_CAP_OFF_CMDIF_CSUM = 46,
};
+enum {
+ /*
+ * Max wqe size for rdma read is 512 bytes, so this
+ * limits our max_sge_rd as the wqe needs to fit:
+ * - ctrl segment (16 bytes)
+ * - rdma segment (16 bytes)
+ * - scatter elements (16 bytes each)
+ */
+ MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16
+};
+
struct mlx5_inbox_hdr {
__be16 opcode;
u8 rsvd[4];
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index cf8f9e700e48..a6b93706b0fc 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -34,6 +34,7 @@
#define _RDMA_IB_H
#include <linux/types.h>
+#include <linux/sched.h>
struct ib_addr {
union {
@@ -86,4 +87,19 @@ struct sockaddr_ib {
__u64 sib_scope_id;
};
+/*
+ * The IB interfaces that use write() as bi-directional ioctl() are
+ * fundamentally unsafe, since there are lots of ways to trigger "write()"
+ * calls from various contexts with elevated privileges. That includes the
+ * traditional suid executable error message writes, but also various kernel
+ * interfaces that can write to file descriptors.
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static inline bool ib_safe_file_access(struct file *filp)
+{
+ return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
+}
+
#endif /* _RDMA_IB_H */