summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c5
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c21
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1
-rw-r--r--drivers/infiniband/hw/hfi1/init.c10
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c6
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c18
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c3
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c59
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c77
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c4
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h13
23 files changed, 183 insertions, 125 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0abce004a959..65e3e7df8a4b 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -76,7 +76,9 @@ static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
- .len = sizeof(struct rdma_nla_ls_gid)},
+ .len = sizeof(struct rdma_nla_ls_gid),
+ .validation_type = NLA_VALIDATE_MIN,
+ .min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index be996dba040c..3d194bb60840 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
struct ib_mad_send_buf *msg;
+ unsigned long flags;
int ret;
lockdep_assert_held(&cm_id_priv->lock);
@@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
return ret;
}
cm_id_priv->id.state = IB_CM_IDLE;
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
}
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
return 0;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index e47c5949013f..ff047eb024ab 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8769e7aa097f..e42c812e74c3 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3610,13 +3610,14 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
ep->com.local_addr.ss_family == AF_INET) {
err = cxgb4_remove_server_filter(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0], false);
} else {
struct sockaddr_in6 *sin6;
c4iw_init_wr_wait(ep->com.wr_waitp);
err = cxgb4_remove_server(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0],
+ ep->com.local_addr.ss_family == AF_INET6);
if (err)
goto done;
err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 2a91b8d95e12..04b1e8f021f6 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -632,22 +632,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
- int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i, ret;
bool new_entry = false;
- /*
- * If the BIOS does not have the NUMA node information set, select
- * NUMA 0 so we get consistent performance.
- */
- if (node < 0) {
- dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
- node = 0;
- }
- dd->node = node;
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
@@ -660,7 +649,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* create an entry in the global affinity structure and initialize it.
*/
if (!entry) {
- entry = node_affinity_allocate(node);
+ entry = node_affinity_allocate(dd->node);
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
@@ -751,6 +740,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (new_entry)
node_affinity_add_tail(entry);
+ dd->affinity_entry = entry;
mutex_unlock(&node_affinity.lock);
return 0;
@@ -766,10 +756,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
{
struct hfi1_affinity_node *entry;
- if (dd->node < 0)
- return;
-
mutex_lock(&node_affinity.lock);
+ if (!dd->affinity_entry)
+ goto unlock;
entry = node_affinity_lookup(dd->node);
if (!entry)
goto unlock;
@@ -780,8 +769,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
*/
_dev_comp_vect_cpu_mask_clean_up(dd, entry);
unlock:
+ dd->affinity_entry = NULL;
mutex_unlock(&node_affinity.lock);
- dd->node = NUMA_NO_NODE;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index e09e8244a94c..2a9a040569eb 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1409,6 +1409,7 @@ struct hfi1_devdata {
spinlock_t irq_src_lock;
int vnic_num_vports;
struct net_device *dummy_netdev;
+ struct hfi1_affinity_node *affinity_entry;
/* Keeps track of IPoIB RSM rule users */
atomic_t ipoib_rsm_usr_num;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index cb7ad1288821..786c6316273f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1277,7 +1277,6 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
dd->pport = (struct hfi1_pportdata *)(dd + 1);
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
- dd->node = NUMA_NO_NODE;
ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
GFP_KERNEL);
@@ -1287,6 +1286,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ dd->node = pcibus_to_node(pdev->bus);
+ if (dd->node == NUMA_NO_NODE) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ dd->node = 0;
+ }
/*
* Initialize all locks for the device. This needs to be as early as
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 1fb6e1a0e4e1..1bcab992ac26 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -173,8 +173,7 @@ u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
return 0;
}
- cpumask_and(node_cpu_mask, cpu_mask,
- cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
+ cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
available_cpus = cpumask_weight(node_cpu_mask);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c3934abeb260..ce26f97b2ca2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1194,8 +1194,10 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
upper_32_bits(dma));
roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
(u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
- roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
+
+ /* Make sure to write tail first and then head */
roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
+ roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
} else {
roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_L_REG, (u32)dma);
roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ebc2a4355fa5..07b8350929cd 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1116,7 +1116,7 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
case MLX5_CMD_OP_CREATE_MKEY:
MLX5_SET(destroy_mkey_in, din, opcode,
MLX5_CMD_OP_DESTROY_MKEY);
- MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id);
+ MLX5_SET(destroy_mkey_in, din, mkey_index, *obj_id);
break;
case MLX5_CMD_OP_CREATE_CQ:
MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
@@ -2073,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
num_alloc_xa_entries++;
event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
- if (!event_sub)
+ if (!event_sub) {
+ err = -ENOMEM;
goto err;
+ }
list_add_tail(&event_sub->event_list, &sub_list);
uverbs_uobject_get(&ev_file->uobj);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 374698186662..b103555b1f5d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1082,7 +1082,7 @@ end:
return ret ? ret : npages;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
* @dev: Pointer to mlx5 IB device
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ec4b3f6a8222..f5a52a6fae43 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1078,7 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, uar_page, uar_index);
- MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Set "fast registration enabled" for all kernel QPs */
@@ -1188,7 +1188,8 @@ static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
}
return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING;
}
- return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
+ return fr_supported ? MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
}
static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
@@ -1206,7 +1207,8 @@ static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
}
return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING;
}
- return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
+ return fr_supported ? MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
}
static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
@@ -1217,7 +1219,8 @@ static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
- int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
+ int ts_format = fr_supported ? MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
if (recv_cq &&
recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
@@ -1930,6 +1933,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
MLX5_SET(qpc, qpc, no_sq, 1);
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
@@ -4873,6 +4877,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct mlx5_ib_dev *dev;
int has_net_offloads;
__be64 *rq_pas0;
+ int ts_format;
void *in;
void *rqc;
void *wq;
@@ -4881,6 +4886,10 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
dev = to_mdev(pd->device);
+ ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
+ if (ts_format < 0)
+ return ts_format;
+
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -4890,6 +4899,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, user_index, rwq->user_index);
MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0eb6a7a618e0..9ea542270ed4 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1244,7 +1244,8 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
* TGT QP isn't associated with RQ/SQ
*/
if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
- (attrs->qp_type != IB_QPT_XRC_TGT)) {
+ (attrs->qp_type != IB_QPT_XRC_TGT) &&
+ (attrs->qp_type != IB_QPT_XRC_INI)) {
struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
index d399523206c7..29d71267af78 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -83,7 +83,8 @@ usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
return interval;
}
-static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int interval_cmp(void *priv, const struct list_head *a,
+ const struct list_head *b)
{
struct usnic_uiom_interval_node *node_a, *node_b;
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 452149066792..06b8dc5093f7 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -4,6 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
+ select CRYPTO
select CRYPTO_CRC32
help
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index a8ac791a1bb9..17a361b8dbb1 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -547,6 +547,7 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ int ret = 0;
rxe_add_ref(qp);
@@ -554,7 +555,8 @@ int rxe_completer(void *arg)
qp->req.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
if (qp->comp.timeout) {
@@ -564,8 +566,10 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry)
- goto exit;
+ if (qp->req.need_retry) {
+ ret = -EAGAIN;
+ goto done;
+ }
state = COMPST_GET_ACK;
@@ -636,8 +640,6 @@ int rxe_completer(void *arg)
break;
case COMPST_DONE:
- if (pkt)
- free_pkt(pkt);
goto done;
case COMPST_EXIT:
@@ -660,7 +662,8 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -672,18 +675,18 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted))
- goto exit;
+ if (!wqe || (wqe->state == wqe_state_posted)) {
+ pr_warn("Retry attempted without a valid wqe\n");
+ ret = -EAGAIN;
+ goto done;
+ }
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
*/
if (qp->comp.started_retry &&
- !qp->comp.timeout_retry) {
- if (pkt)
- free_pkt(pkt);
+ !qp->comp.timeout_retry)
goto done;
- }
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@@ -704,8 +707,6 @@ int rxe_completer(void *arg)
qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 0);
}
- if (pkt)
- free_pkt(pkt);
goto done;
} else {
@@ -726,8 +727,8 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- free_pkt(pkt);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -740,25 +741,15 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
- if (pkt)
- free_pkt(pkt);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
}
-exit:
- /* we come here if we are done with processing and want the task to
- * exit from the loop calling us
- */
- WARN_ON_ONCE(skb);
- rxe_drop_ref(qp);
- return -EAGAIN;
-
done:
- /* we come here if we have processed a packet we want the task to call
- * us again to see if there is anything else to do
- */
- WARN_ON_ONCE(skb);
+ if (pkt)
+ free_pkt(pkt);
rxe_drop_ref(qp);
- return 0;
+
+ return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0701bd1ffd1a..01662727dca0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
+/* fix up a send packet to match the packets
+ * received from UDP before looping them back
+ */
void rxe_loopback(struct sk_buff *skb)
{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
if (skb->protocol == htons(ETH_P_IP))
skb_pull(skb, sizeof(struct iphdr));
else
skb_pull(skb, sizeof(struct ipv6hdr));
- rxe_rcv(skb);
+ if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
+ kfree_skb(skb);
+ else
+ rxe_rcv(skb);
}
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 45d2f711bce2..7a49e27da23a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
- struct sk_buff *per_qp_skb;
- struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
if (!mcg)
- goto err1; /* mcast group not registered */
+ goto drop; /* mcast group not registered */
spin_lock_bh(&mcg->mcg_lock);
+ /* this is unreliable datagram service so we let
+ * failures to deliver a multicast packet to a
+ * single QP happen and just move on and try
+ * the rest of them on the list
+ */
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
@@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
if (err)
continue;
- /* for all but the last qp create a new clone of the
- * skb and pass to the qp. If an error occurs in the
- * checks for the last qp in the list we need to
- * free the skb since it hasn't been passed on to
- * rxe_rcv_pkt() which would free it later.
+ /* for all but the last QP create a new clone of the
+ * skb and pass to the QP. Pass the original skb to
+ * the last QP in the list.
*/
if (mce->qp_list.next != &mcg->qp_list) {
- per_qp_skb = skb_clone(skb, GFP_ATOMIC);
- if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
- kfree_skb(per_qp_skb);
+ struct sk_buff *cskb;
+ struct rxe_pkt_info *cpkt;
+
+ cskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!cskb))
continue;
+
+ if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
+ kfree_skb(cskb);
+ break;
}
+
+ cpkt = SKB_TO_PKT(cskb);
+ cpkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(cpkt, cskb);
} else {
- per_qp_skb = skb;
- /* show we have consumed the skb */
- skb = NULL;
+ pkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(pkt, skb);
+ skb = NULL; /* mark consumed */
}
-
- if (unlikely(!per_qp_skb))
- continue;
-
- per_qp_pkt = SKB_TO_PKT(per_qp_skb);
- per_qp_pkt->qp = qp;
- rxe_add_ref(qp);
- rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
-err1:
- /* free skb if not consumed */
+ if (likely(!skb))
+ return;
+
+ /* This only occurs if one of the checks fails on the last
+ * QP in the list above
+ */
+
+drop:
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 0a08b4b742a3..b74a872387c4 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -103,11 +103,11 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt,
* up earlier.
*
* Context:
- * Can sleep if @wait == RTRS_TAG_WAIT
+ * Can sleep if @wait == RTRS_PERMIT_WAIT
*/
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt,
enum rtrs_clt_con_type con_type,
- int can_wait)
+ enum wait_type can_wait)
{
struct rtrs_permit *permit;
DEFINE_WAIT(wait);
@@ -174,7 +174,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
int id = 0;
if (likely(permit->con_type == RTRS_IO_CON))
- id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
+ id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
return to_clt_con(sess->s.con[id]);
}
@@ -1400,23 +1400,29 @@ static void rtrs_clt_close_work(struct work_struct *work);
static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
const struct rtrs_addr *path,
size_t con_num, u16 max_segments,
- size_t max_segment_size)
+ u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess;
int err = -ENOMEM;
int cpu;
+ size_t total_con;
sess = kzalloc(sizeof(*sess), GFP_KERNEL);
if (!sess)
goto err;
- /* Extra connection for user messages */
- con_num += 1;
-
- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
+ /*
+ * irqmode and poll
+ * +1: Extra connection for user messages
+ */
+ total_con = con_num + nr_poll_queues + 1;
+ sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
if (!sess->s.con)
goto err_free_sess;
+ sess->s.con_num = total_con;
+ sess->s.irq_con_num = con_num + 1;
+
sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
if (!sess->stats)
goto err_free_con;
@@ -1435,9 +1441,8 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
memcpy(&sess->s.src_addr, path->src,
rdma_addr_size((struct sockaddr *)path->src));
strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
- sess->s.con_num = con_num;
sess->clt = clt;
- sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
+ sess->max_pages_per_mr = max_segments;
init_waitqueue_head(&sess->state_wq);
sess->state = RTRS_CLT_CONNECTING;
atomic_set(&sess->connected_cnt, 0);
@@ -1576,9 +1581,14 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
}
cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
- err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, cq_size, max_send_wr,
- max_recv_wr, IB_POLL_SOFTIRQ);
+ if (con->c.cid >= sess->s.irq_con_num)
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_DIRECT);
+ else
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
@@ -2528,7 +2538,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
void (*link_ev)(void *priv,
enum rtrs_clt_link_ev ev),
unsigned int max_segments,
- size_t max_segment_size,
unsigned int reconnect_delay_sec,
unsigned int max_reconnect_attempts)
{
@@ -2558,7 +2567,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
clt->port = port;
clt->pdu_sz = pdu_sz;
clt->max_segments = max_segments;
- clt->max_segment_size = max_segment_size;
clt->reconnect_delay_sec = reconnect_delay_sec;
clt->max_reconnect_attempts = max_reconnect_attempts;
clt->priv = priv;
@@ -2628,9 +2636,9 @@ static void free_clt(struct rtrs_clt *clt)
* @pdu_sz: Size of extra payload which can be accessed after permit allocation.
* @reconnect_delay_sec: time between reconnect tries
* @max_segments: Max. number of segments per IO request
- * @max_segment_size: Max. size of one segment
* @max_reconnect_attempts: Number of times to reconnect on error before giving
* up, 0 for * disabled, -1 for forever
+ * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
*
* Starts session establishment with the rtrs_server. The function can block
* up to ~2000ms before it returns.
@@ -2643,8 +2651,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t paths_num, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
- s16 max_reconnect_attempts)
+ s16 max_reconnect_attempts, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess, *tmp;
struct rtrs_clt *clt;
@@ -2652,7 +2659,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv,
ops->link_ev,
- max_segments, max_segment_size, reconnect_delay_sec,
+ max_segments, reconnect_delay_sec,
max_reconnect_attempts);
if (IS_ERR(clt)) {
err = PTR_ERR(clt);
@@ -2662,7 +2669,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
struct rtrs_clt_sess *sess;
sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
- max_segments, max_segment_size);
+ max_segments, nr_poll_queues);
if (IS_ERR(sess)) {
err = PTR_ERR(sess);
goto close_all_sess;
@@ -2720,8 +2727,8 @@ void rtrs_clt_close(struct rtrs_clt *clt)
/* Now it is safe to iterate over all paths without locks */
list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
- rtrs_clt_destroy_sess_files(sess, NULL);
rtrs_clt_close_conns(sess, true);
+ rtrs_clt_destroy_sess_files(sess, NULL);
kobject_put(&sess->kobj);
}
free_clt(clt);
@@ -2887,6 +2894,31 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
}
EXPORT_SYMBOL(rtrs_clt_request);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+{
+ int cnt;
+ struct rtrs_con *con;
+ struct rtrs_clt_sess *sess;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+ (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+ con = sess->s.con[index + 1];
+ cnt = ib_process_cq_direct(con->cq, -1);
+ if (cnt)
+ break;
+ }
+ path_it_deinit(&it);
+ rcu_read_unlock();
+
+ return cnt;
+}
+EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
+
/**
* rtrs_clt_query() - queries RTRS session attributes
*@clt: session pointer
@@ -2915,8 +2947,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
struct rtrs_clt_sess *sess;
int err;
- sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
- clt->max_segment_size);
+ sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, 0);
if (IS_ERR(sess))
return PTR_ERR(sess);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 692bc83e1f09..98ba5d0a48b8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -166,7 +166,6 @@ struct rtrs_clt {
unsigned int max_reconnect_attempts;
unsigned int reconnect_delay_sec;
unsigned int max_segments;
- size_t max_segment_size;
void *permits;
unsigned long *permits_map;
size_t queue_depth;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 8caad0a2322b..00eb45053339 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -101,6 +101,7 @@ struct rtrs_sess {
uuid_t uuid;
struct rtrs_con **con;
unsigned int con_num;
+ unsigned int irq_con_num;
unsigned int recon_cnt;
struct rtrs_ib_dev *dev;
int dev_ref;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index d071809e3ed2..f7aa2a7e7442 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -998,7 +998,7 @@ static void process_read(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(msg->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, READ, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
@@ -1051,7 +1051,7 @@ static void process_write(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(req->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, WRITE, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
rtrs_err_rl(s,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 8738e90e715a..bebaa94c4728 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -58,14 +58,13 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t path_cnt, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
- s16 max_reconnect_attempts);
+ s16 max_reconnect_attempts, u32 nr_poll_queues);
void rtrs_clt_close(struct rtrs_clt *sess);
-enum {
+enum wait_type {
RTRS_PERMIT_NOWAIT = 0,
- RTRS_PERMIT_WAIT = 1,
+ RTRS_PERMIT_WAIT = 1
};
/**
@@ -81,7 +80,7 @@ enum rtrs_clt_con_type {
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess,
enum rtrs_clt_con_type con_type,
- int wait);
+ enum wait_type wait);
void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit);
@@ -103,6 +102,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
struct rtrs_clt *sess, struct rtrs_permit *permit,
const struct kvec *vec, size_t nr, size_t len,
struct scatterlist *sg, unsigned int sg_cnt);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes
@@ -138,7 +138,6 @@ struct rtrs_srv_ops {
* message for the data transfer will be sent to
* the client.
- * @sess: Session
* @priv: Private data set by rtrs_srv_set_sess_priv()
* @id: internal RTRS operation id
* @dir: READ/WRITE
@@ -152,7 +151,7 @@ struct rtrs_srv_ops {
* @usr: The extra user message sent by the client (%vec)
* @usrlen: Size of the user message
*/
- int (*rdma_ev)(struct rtrs_srv *sess, void *priv,
+ int (*rdma_ev)(void *priv,
struct rtrs_srv_op *id, int dir,
void *data, size_t datalen, const void *usr,
size_t usrlen);