summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c45
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h156
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c323
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c179
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c337
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c118
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h41
-rw-r--r--drivers/infiniband/ulp/isert/isert_proto.h47
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c205
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c443
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h53
17 files changed, 854 insertions, 1194 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3ede10309754..a6f3eab0f350 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3ae9726efb98..917e46ea3bf6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
static struct ib_send_wr ipoib_cm_rx_drain_wr = {
- .wr_id = IPOIB_CM_RX_DRAIN_WRID,
.opcode = IB_WR_SEND,
};
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
* error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post drain wr\n");
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- int i, ret;
- struct ib_device_attr attr;
+ int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
skb_queue_head_init(&priv->cm.skb_queue);
- ret = ib_query_device(priv->ca, &attr);
- if (ret) {
- printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
- return ret;
- }
-
- ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+ ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
- attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
- ipoib_cm_create_srq(dev, attr.max_srq_sge);
+ max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+ ipoib_cm_create_srq(dev, max_srq_sge);
if (ipoib_cm_has_srq(dev)) {
- priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
- priv->cm.num_frags = attr.max_srq_sge;
+ priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+ priv->cm.num_frags = max_srq_sge;
ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
priv->cm.max_cm_mtu, priv->cm.num_frags);
} else {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 078cadd6c797..a53fa5fc0dec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
- struct ib_device_attr *attr;
-
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
- if (attr && !ib_query_device(priv->ca, attr))
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%d", (int)(attr->fw_ver >> 32),
- (int)(attr->fw_ver >> 16) & 0xffff,
- (int)attr->fw_ver & 0xffff);
- kfree(attr);
+
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+ (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+ (int)priv->ca->attrs.fw_ver & 0xffff);
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7d3281866ffc..25509bbd4a05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long flags;
int i;
LIST_HEAD(remove_list);
- struct ipoib_mcast *mcast, *tmcast;
- struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
- u8 *mgid = neigh->daddr + 4;
- /* Is this multicast ? */
- if (*mgid == 0xff) {
- mcast = __ipoib_mcast_find(dev, mgid);
-
- if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
- list_del(&mcast->list);
- rb_erase(&mcast->rb_node, &priv->multicast_tree);
- list_add_tail(&mcast->list, &remove_list);
- }
- }
+ ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
- struct ib_device_attr *device_attr;
- int result = -ENOMEM;
-
- device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
- if (!device_attr) {
- printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
- hca->name, sizeof *device_attr);
- return result;
- }
-
- result = ib_query_device(hca, device_attr);
- if (result) {
- printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
- hca->name, result);
- kfree(device_attr);
- return result;
- }
- priv->hca_caps = device_attr->device_cap_flags;
-
- kfree(device_attr);
+ priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features = NETIF_F_SG |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f357ca67a41c..050dfa175d16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
return 0;
}
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list)
+{
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, remove_list);
+ }
+ }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+ struct ipoib_mcast *mcast, *tmcast;
+
+ list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+ ipoib_mcast_leave(mcast->dev, mcast);
+ ipoib_mcast_free(mcast);
+ }
+}
+
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(mcast->dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
/*
* Double check that we are still up
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9080161e01af..c827c93f46c5 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
- u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+ u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* max fastreg page list length.
*/
shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
- ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+ ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
shost->max_sectors = min_t(unsigned int,
1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
release_wq = alloc_workqueue("release workqueue", 0, 0);
if (!release_wq) {
iser_err("failed to allocate release workqueue\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_alloc_wq;
}
iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
if (!iscsi_iser_scsi_transport) {
iser_err("iscsi_register_transport failed\n");
err = -EINVAL;
- goto register_transport_failure;
+ goto err_reg;
}
return 0;
-register_transport_failure:
+err_reg:
+ destroy_workqueue(release_wq);
+err_alloc_wq:
kmem_cache_destroy(ig.desc_cache);
return err;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e6a407..95f0a64e076b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -48,6 +48,7 @@
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
+#include <scsi/iser.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
@@ -151,46 +152,10 @@
- ISER_MAX_RX_MISC_PDUS) / \
(1 + ISER_INFLIGHT_DATAOUTS))
-#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-
-#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
-#define ISER_BEACON_WRID 0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags: flags support (zbva, remote_inv)
- * @rsvd: reserved
- * @write_stag: write rkey
- * @write_va: write virtual address
- * @reaf_stag: read rkey
- * @read_va: read virtual address
- */
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag;
- __be64 write_va;
- __be32 read_stag;
- __be64 read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED 0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
-
-struct iser_cm_hdr {
- u8 flags;
- u8 rsvd[3];
-} __packed;
-
/* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
#define ISER_RECV_DATA_SEG_LEN 128
#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
#define ISER_MAX_WRS 7
/**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
* @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
struct ib_sge tx_sg[2];
int num_sge;
+ struct ib_cqe cqe;
bool mapped;
u8 wr_idx;
union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
- sizeof(u64) + sizeof(struct ib_sge)))
+ sizeof(u64) + sizeof(struct ib_sge) + \
+ sizeof(struct ib_cqe)))
/**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
* @pad: for sense data TODO: Modify to maximum sense length supported
*/
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
struct ib_sge rx_sg;
+ struct ib_cqe cqe;
char pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req: pointer to login request buffer
+ * @resp: pointer to login response buffer
+ * @req_dma: DMA address of login request buffer
+ * @rsp_dma: DMA address of login response buffer
+ * @sge: IB sge for login post recv
+ * @cqe: completion handler
+ */
+struct iser_login_desc {
+ void *req;
+ void *rsp;
+ u64 req_dma;
+ u64 rsp_dma;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
} __attribute__((packed));
struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
/**
* struct iser_comp - iSER completion context
*
- * @device: pointer to device handle
* @cq: completion queue
- * @wcs: work completion array
- * @tasklet: Tasklet handle
* @active_qps: Number of active QPs attached
* to completion context
*/
struct iser_comp {
- struct iser_device *device;
struct ib_cq *cq;
- struct ib_wc wcs[ISER_WC_BATCH_COUNT];
- struct tasklet_struct tasklet;
int active_qps;
};
@@ -380,7 +361,6 @@ struct iser_reg_ops {
*
* @ib_device: RDMA device
* @pd: Protection Domain for this device
- * @dev_attr: Device attributes container
* @mr: Global DMA memory region
* @event_handler: IB events handle routine
* @ig_list: entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
* cpus and device max completion vectors
* @comps: Dinamically allocated array of completion handlers
* @reg_ops: Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
*/
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_device_attr dev_attr;
struct ib_mr *mr;
struct ib_event_handler event_handler;
struct list_head ig_list;
int refcount;
int comps_used;
struct iser_comp *comps;
- struct iser_reg_ops *reg_ops;
+ const struct iser_reg_ops *reg_ops;
+ bool remote_inv_sup;
};
#define ISER_CHECK_GUARD 0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
- * @pi_support: Indicate device T10-PI support
- * @beacon: beacon send wr to signal all flush errors were drained
- * @flush_comp: completes when all connection completions consumed
* @fr_pool: connection fast registration poool
+ * @pi_support: Indicate device T10-PI support
+ * @last: last send wr to signal all flush errors were drained
+ * @last_cqe: cqe handler for last wr
+ * @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
- bool pi_support;
- struct ib_send_wr beacon;
- struct completion flush_comp;
struct iser_fr_pool fr_pool;
+ bool pi_support;
+ struct ib_send_wr last;
+ struct ib_cqe last_cqe;
+ struct ib_cqe reg_cqe;
+ struct completion last_comp;
};
/**
@@ -514,11 +498,7 @@ struct ib_conn {
* @up_completion: connection establishment completed
* (state is ISER_CONN_UP)
* @conn_list: entry in ig conn list
- * @login_buf: login data buffer (stores login parameters)
- * @login_req_buf: login request buffer
- * @login_req_dma: login request buffer dma address
- * @login_resp_buf: login response buffer
- * @login_resp_dma: login response buffer dma address
+ * @login_desc: login descriptor
* @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
struct completion ib_completion;
struct completion up_completion;
struct list_head conn_list;
-
- char *login_buf;
- char *login_req_buf, *login_resp_buf;
- u64 login_req_dma, login_resp_dma;
+ struct iser_login_desc login_desc;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
unsigned int scsi_max_sectors;
+ bool snd_w_inv;
};
/**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
struct iser_page_vec {
u64 *pages;
- int length;
- int offset;
- int data_size;
+ int npages;
+ struct ib_mr fake_mr;
};
/**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
void iser_release_work(struct work_struct *work);
-void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
- struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir);
+ enum iser_data_dir dir,
+ bool all_imm);
void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
enum iser_data_dir dir);
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
return cur_wr;
}
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+ return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_login_desc, cqe);
+}
+
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ffd00c420729..ed54b388e7ad 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+ buf_out->data_len == imm_sz);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISER_VER;
tx_desc->num_sge = 1;
}
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
static void iser_free_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
- if (!iser_conn->login_buf)
+ if (!desc->req)
return;
- if (iser_conn->login_req_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (iser_conn->login_resp_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(iser_conn->login_buf);
+ kfree(desc->req);
+ kfree(desc->rsp);
/* make sure we never redo any unmapping */
- iser_conn->login_req_dma = 0;
- iser_conn->login_resp_dma = 0;
- iser_conn->login_buf = NULL;
+ desc->req = NULL;
+ desc->rsp = NULL;
}
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
- int req_err, resp_err;
-
- BUG_ON(device == NULL);
-
- iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!iser_conn->login_buf)
- goto out_err;
-
- iser_conn->login_req_buf = iser_conn->login_buf;
- iser_conn->login_resp_buf = iser_conn->login_buf +
- ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN,
- DMA_TO_DEVICE);
-
- iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE,
- DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- iser_conn->login_req_dma = 0;
- if (resp_err)
- iser_conn->login_resp_dma = 0;
- goto free_login_buf;
- }
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+
+ desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+ if (!desc->req)
+ return -ENOMEM;
+
+ desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->req_dma))
+ goto free_req;
+
+ desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!desc->rsp)
+ goto unmap_req;
+
+ desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->rsp_dma))
+ goto free_rsp;
+
return 0;
-free_login_buf:
- iser_free_login_buf(iser_conn);
+free_rsp:
+ kfree(desc->rsp);
+unmap_req:
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+free_req:
+ kfree(desc->req);
-out_err:
- iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
goto rx_desc_dma_map_failed;
rx_desc->dma_addr = dma_addr;
-
+ rx_desc->cqe.done = iser_task_rsp;
rx_sg = &rx_desc->rx_sg;
- rx_sg->addr = rx_desc->dma_addr;
+ rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->pd->local_dma_lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+ tx_desc->cqe.done = iser_cmd_comp;
iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
}
tx_desc->type = ISCSI_TX_DATAOUT;
+ tx_desc->cqe.done = iser_dataout_comp;
tx_desc->iser_header.flags = ISER_VER;
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
+ mdesc->cqe.done = iser_ctrl_comp;
iser_create_send_desc(iser_conn, mdesc);
device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
data_seg_len = ntoh24(task->hdr->dlength);
if (data_seg_len > 0) {
+ struct iser_login_desc *desc = &iser_conn->login_desc;
struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
if (task != conn->login_task) {
iser_err("data present on non login task!!!\n");
goto send_control_error;
}
- ib_dma_sync_single_for_cpu(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+ memcpy(desc->req, task->data, task->data_count);
- ib_dma_sync_single_for_device(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- tx_dsg->addr = iser_conn->login_req_dma;
- tx_dsg->length = task->data_count;
- tx_dsg->lkey = device->pd->local_dma_lkey;
+ tx_dsg->addr = desc->req_dma;
+ tx_dsg->length = task->data_count;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
mdesc->num_sge = 2;
}
@@ -562,41 +556,126 @@ send_control_error:
return err;
}
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
- unsigned long rx_xfer_len,
- struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_login_desc *desc = iser_login(wc->wr_cqe);
struct iscsi_hdr *hdr;
- u64 rx_dma;
- int rx_buflen, outstanding, count, err;
+ char *data;
+ int length;
- /* differentiate between login to all other PDUs */
- if ((char *)rx_desc == iser_conn->login_resp_buf) {
- rx_dma = iser_conn->login_resp_dma;
- rx_buflen = ISER_RX_LOGIN_SIZE;
- } else {
- rx_dma = rx_desc->dma_addr;
- rx_buflen = ISER_RX_PAYLOAD_SIZE;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "login_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = desc->rsp + sizeof(struct iser_ctrl);
+ data = desc->rsp + ISER_HEADERS_LEN;
+ length = wc->byte_len - ISER_HEADERS_LEN;
+
+ iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+ hdr->itt, length);
+
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+ if (likely(rkey == desc->rsc.mr->rkey))
+ desc->rsc.mr_valid = 0;
+ else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+ desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+ struct ib_wc *wc,
+ struct iscsi_hdr *hdr)
+{
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+ struct iscsi_task *task;
+ u32 rkey = wc->ex.invalidate_rkey;
+
+ iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+ iser_conn, rkey);
+
+ if (unlikely(!iser_conn->snd_w_inv)) {
+ iser_err("conn %p: unexepected remote invalidation, "
+ "terminating connection\n", iser_conn);
+ return -EPROTO;
+ }
+
+ task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+ if (likely(task)) {
+ struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_fr_desc *desc;
+
+ if (iser_task->dir[ISER_DIR_IN]) {
+ desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+
+ if (iser_task->dir[ISER_DIR_OUT]) {
+ desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+ } else {
+ iser_err("failed to get task for itt=%d\n", hdr->itt);
+ return -EINVAL;
+ }
}
- ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ return 0;
+}
- hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+ struct iscsi_hdr *hdr;
+ int length;
+ int outstanding, count, err;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "task_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = &desc->iscsi_header;
+ length = wc->byte_len - ISER_HEADERS_LEN;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
- hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+ hdr->itt, length);
+
+ if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ return;
+ }
- iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
- rx_xfer_len - ISER_HEADERS_LEN);
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
- ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == iser_conn->login_resp_dma)
- return;
-
outstanding = ib_conn->post_recv_buf_count;
if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
}
}
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
struct iscsi_task *task;
- struct iser_device *device = ib_conn->device;
- if (tx_desc->type == ISCSI_TX_DATAOUT) {
- ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
- ISER_HEADERS_LEN, DMA_TO_DEVICE);
- kmem_cache_free(ig.desc_cache, tx_desc);
- tx_desc = NULL;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "control");
+ return;
}
- if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
- /* this arithmetic is legal by libiscsi dd_data allocation */
- task = (void *) ((long)(void *)tx_desc -
- sizeof(struct iscsi_task));
- if (task->hdr->itt == RESERVED_ITT)
- iscsi_put_task(task);
- }
+ /* this arithmetic is legal by libiscsi dd_data allocation */
+ task = (void *)desc - sizeof(struct iscsi_task);
+ if (task->hdr->itt == RESERVED_ITT)
+ iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_device *device = ib_conn->device;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "dataout");
+
+ ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+
+ complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ea765fb9664d..9a391cc5b9b3 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_reg_resources *rsc,
struct iser_mem_reg *mem_reg);
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
.alloc_reg_res = iser_alloc_fastreg_pool,
.free_reg_res = iser_free_fastreg_pool,
.reg_mem = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
.reg_desc_put = iser_reg_desc_put_fr,
};
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
.alloc_reg_res = iser_alloc_fmr_pool,
.free_reg_res = iser_free_fmr_pool,
.reg_mem = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
.reg_desc_put = iser_reg_desc_put_fmr,
};
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ iser_err_comp(wc, "memreg");
+}
+
int iser_assign_reg_ops(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
- device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+ ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
- } else
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
iser_info("FastReg supported, using FastReg for registration\n");
device->reg_ops = &fastreg_ops;
+ device->remote_inv_sup = iser_always_reg;
} else {
iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
{
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct ib_device *ibdev, u64 *pages,
- int *offset, int *data_size)
-{
- struct scatterlist *sg, *sgl = data->sg;
- u64 start_addr, end_addr, page, chunk_start = 0;
- unsigned long total_sz = 0;
- unsigned int dma_len;
- int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
- /* compute the offset of first element */
- *offset = (u64) sgl[0].offset & ~MASK_4K;
-
- new_chunk = 1;
- cur_page = 0;
- for_each_sg(sgl, sg, data->dma_nents, i) {
- start_addr = ib_sg_dma_address(ibdev, sg);
- if (new_chunk)
- chunk_start = start_addr;
- dma_len = ib_sg_dma_len(ibdev, sg);
- end_addr = start_addr + dma_len;
- total_sz += dma_len;
-
- /* collect page fragments until aligned or end of SG list */
- if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
- new_chunk = 0;
- continue;
- }
- new_chunk = 1;
-
- /* address of the first page in the contiguous chunk;
- masking relevant for the very first SG entry,
- which might be unaligned */
- page = chunk_start & MASK_4K;
- do {
- pages[cur_page++] = page;
- page += SIZE_4K;
- } while (page < end_addr);
- }
-
- *data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n",
- *data_size, cur_page);
- return cur_page;
-}
-
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
int i;
- iser_err("page vec length %d data size %d\n",
- page_vec->length, page_vec->data_size);
- for (i = 0; i < page_vec->length; i++)
- iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+ iser_err("page vec npages %d data length %d\n",
+ page_vec->npages, page_vec->fake_mr.length);
+ for (i = 0; i < page_vec->npages; i++)
+ iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->pd->local_dma_lkey;
- reg->rkey = device->mr->rkey;
+ /*
+ * FIXME: rework the registration code path to differentiate
+ * rkey/lkey use cases
+ */
+ reg->rkey = device->mr ? device->mr->rkey : 0;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
return 0;
}
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+ struct iser_page_vec *page_vec =
+ container_of(mr, struct iser_page_vec, fake_mr);
+
+ page_vec->pages[page_vec->npages++] = addr;
+
+ return 0;
+}
+
static
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct ib_pool_fmr *fmr;
int ret, plen;
- plen = iser_sg_to_page_vec(mem, device->ib_device,
- page_vec->pages,
- &page_vec->offset,
- &page_vec->data_size);
- page_vec->length = plen;
- if (plen * SIZE_4K < page_vec->data_size) {
+ page_vec->npages = 0;
+ page_vec->fake_mr.page_size = SIZE_4K;
+ plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+ mem->size, iser_set_page);
+ if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
iser_dump_page_vec(page_vec);
return -EINVAL;
}
- fmr = ib_fmr_pool_map_phys(fmr_pool,
- page_vec->pages,
- page_vec->length,
- page_vec->pages[0]);
+ fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+ page_vec->npages, page_vec->pages[0]);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
reg->sge.lkey = fmr->fmr->lkey;
reg->rkey = fmr->fmr->rkey;
- reg->sge.addr = page_vec->pages[0] + page_vec->offset;
- reg->sge.length = page_vec->data_size;
+ reg->sge.addr = page_vec->fake_mr.iova;
+ reg->sge.length = page_vec->fake_mr.length;
reg->mem_h = fmr;
iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
*mask |= ISER_CHECK_GUARD;
}
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+ struct ib_mr *mr,
+ struct ib_cqe *cqe)
{
- u32 rkey;
-
inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
-
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
}
static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_sig_handover_wr *wr;
+ struct ib_mr *mr = pi_ctx->sig_mr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!pi_ctx->sig_mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+ if (pi_ctx->sig_mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_SIG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.sg_list = &data_reg->sge;
wr->wr.num_sge = 1;
wr->wr.send_flags = 0;
wr->sig_attrs = sig_attrs;
- wr->sig_mr = pi_ctx->sig_mr;
+ wr->sig_mr = mr;
if (scsi_prot_sg_count(iser_task->sc))
wr->prot = &prot_reg->sge;
else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
wr->access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
- pi_ctx->sig_mr_valid = 0;
+ pi_ctx->sig_mr_valid = 1;
- sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
- sig_reg->rkey = pi_ctx->sig_mr->rkey;
+ sig_reg->sge.lkey = mr->lkey;
+ sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
struct ib_reg_wr *wr;
int n;
- if (!rsc->mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+ if (rsc->mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
wr = reg_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
wr->wr.num_sge = 0;
wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
- rsc->mr_valid = 0;
+ rsc->mr_valid = 1;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
}
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir)
+ enum iser_data_dir dir,
+ bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
bool use_dma_key;
int err;
- use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
- scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+ use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da620f2e..40c0f4978e2f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -44,17 +44,6 @@
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
ISCSI_ISER_MAX_CONN)
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
- iser_err("cq event %s (%d)\n",
- ib_event_msg(cause->event), cause->event);
-}
-
static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret, i, max_cqe;
- ret = ib_query_device(device->ib_device, dev_attr);
- if (ret) {
- pr_warn("Query device failed for %s\n", device->ib_device->name);
- return ret;
- }
-
ret = iser_assign_reg_ops(device);
if (ret)
return ret;
device->comps_used = min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors);
+ ib_dev->num_comp_vectors);
device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
GFP_KERNEL);
if (!device->comps)
goto comps_err;
- max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, max_cqe);
+ device->comps_used, ib_dev->name,
+ ib_dev->num_comp_vectors, max_cqe);
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd))
goto pd_err;
for (i = 0; i < device->comps_used; i++) {
- struct ib_cq_init_attr cq_attr = {};
struct iser_comp *comp = &device->comps[i];
- comp->device = device;
- cq_attr.cqe = max_cqe;
- cq_attr.comp_vector = i;
- comp->cq = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)comp,
- &cq_attr);
+ comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+ IB_POLL_SOFTIRQ);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
}
-
- if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
- goto cq_err;
-
- tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
- (unsigned long)comp);
}
if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
device->mr = ib_get_dma_mr(device->pd, access);
if (IS_ERR(device->mr))
- goto dma_mr_err;
+ goto cq_err;
}
- INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
- iser_event_handler);
+ INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+ iser_event_handler);
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
if (device->mr)
ib_dereg_mr(device->mr);
-dma_mr_err:
- for (i = 0; i < device->comps_used; i++)
- tasklet_kill(&device->comps[i].tasklet);
cq_err:
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
if (comp->cq)
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
- tasklet_kill(&comp->tasklet);
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
comp->cq = NULL;
}
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
return ret;
}
- res->mr_valid = 1;
+ res->mr_valid = 0;
return 0;
}
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
- pi_ctx->sig_mr_valid = 1;
+ pi_ctx->sig_mr_valid = 0;
desc->pi_ctx->sig_protected = 0;
return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
struct iser_device *device;
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
- dev_attr = &device->dev_attr;
+ ib_dev = device->ib_device;
memset(&init_attr, 0, sizeof init_attr);
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
iser_dbg("device %s supports max_send_wr %d\n",
- device->ib_device->name, dev_attr->max_qp_wr);
+ device->ib_device->name, ib_dev->attrs.max_qp_wr);
}
}
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* post an indication that all flush errors were consumed */
- err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
if (err) {
- iser_err("conn %p failed to post beacon", ib_conn);
+ iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
- wait_for_completion(&ib_conn->flush_comp);
+ wait_for_completion(&ib_conn->last_comp);
}
return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->dev_attr.max_fast_reg_page_list_len);
+ device->ib_device->attrs.max_fast_reg_page_list_len);
if (sg_tablesize > sup_sg_tablesize) {
sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
- if (!(device->dev_attr.device_cap_flags &
+ if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+ conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
memset(&req_hdr, 0, sizeof(req_hdr));
- req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
- ISER_SEND_W_INV_NOT_SUPPORTED);
- conn_param.private_data = (void *)&req_hdr;
- conn_param.private_data_len = sizeof(struct iser_cm_hdr);
+ req_hdr.flags = ISER_ZBVA_NOT_SUP;
+ if (!device->remote_inv_sup)
+ req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+ conn_param.private_data = (void *)&req_hdr;
+ conn_param.private_data_len = sizeof(struct iser_cm_hdr);
ret = rdma_connect(cma_id, &conn_param);
if (ret) {
@@ -863,7 +829,8 @@ failure:
iser_connect_error(cma_id);
}
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+ const void *private_data)
{
struct iser_conn *iser_conn;
struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
+ if (private_data) {
+ u8 flags = *(u8 *)private_data;
+
+ iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+ }
+
+ iser_info("conn %p: negotiated %s invalidation\n",
+ iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
iser_conn->state = ISER_CONN_UP;
complete(&iser_conn->up_completion);
}
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- iser_connected_handler(cma_id);
+ iser_connected_handler(cma_id, event->param.conn.private_data);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
void iser_conn_init(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
iser_conn->state = ISER_CONN_INIT;
- iser_conn->ib_conn.post_recv_buf_count = 0;
- init_completion(&iser_conn->ib_conn.flush_comp);
init_completion(&iser_conn->stop_completion);
init_completion(&iser_conn->ib_completion);
init_completion(&iser_conn->up_completion);
INIT_LIST_HEAD(&iser_conn->conn_list);
mutex_init(&iser_conn->state_mutex);
+
+ ib_conn->post_recv_buf_count = 0;
+ ib_conn->reg_cqe.done = iser_reg_comp;
+ ib_conn->last_cqe.done = iser_last_comp;
+ ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+ ib_conn->last.opcode = IB_WR_SEND;
+ init_completion(&ib_conn->last_comp);
}
/**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn *iser_conn,
iser_conn->state = ISER_CONN_PENDING;
- ib_conn->beacon.wr_id = ISER_BEACON_WRID;
- ib_conn->beacon.opcode = IB_WR_SEND;
-
ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
(void *)iser_conn,
RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
int iser_post_recvl(struct iser_conn *iser_conn)
{
- struct ib_recv_wr rx_wr, *rx_wr_failed;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct ib_sge sge;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+ struct ib_recv_wr wr, *wr_failed;
int ib_ret;
- sge.addr = iser_conn->login_resp_dma;
- sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = ib_conn->device->pd->local_dma_lkey;
+ desc->sge.addr = desc->rsp_dma;
+ desc->sge.length = ISER_RX_LOGIN_SIZE;
+ desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
- rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
- rx_wr.sg_list = &sge;
- rx_wr.num_sge = 1;
- rx_wr.next = NULL;
+ desc->cqe.done = iser_login_rsp;
+ wr.wr_cqe = &desc->cqe;
+ wr.sg_list = &desc->sge;
+ wr.num_sge = 1;
+ wr.next = NULL;
ib_conn->post_recv_buf_count++;
- ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count--;
}
+
return ib_ret;
}
int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
- struct ib_recv_wr *rx_wr, *rx_wr_failed;
- int i, ib_ret;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
+ struct ib_recv_wr *wr, *wr_failed;
+ int i, ib_ret;
- for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
+ for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
+ rx_desc->cqe.done = iser_task_rsp;
+ wr->wr_cqe = &rx_desc->cqe;
+ wr->sg_list = &rx_desc->rx_sg;
+ wr->num_sge = 1;
+ wr->next = wr + 1;
my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
- rx_wr--;
- rx_wr->next = NULL; /* mark end of work requests list */
+ wr--;
+ wr->next = NULL; /* mark end of work requests list */
ib_conn->post_recv_buf_count += count;
- ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
iser_conn->rx_desc_head = my_rx_head;
+
return ib_ret;
}
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
DMA_TO_DEVICE);
wr->next = NULL;
- wr->wr_id = (uintptr_t)tx_desc;
+ wr->wr_cqe = &tx_desc->cqe;
wr->sg_list = tx_desc->tx_sg;
wr->num_sge = tx_desc->num_sge;
wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
return ib_ret;
}
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- * is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
- void *start = iser_conn->rx_descs;
- int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
- if (wr_id >= start && wr_id < start + len)
- return false;
-
- return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn: connection RDMA resources
- * @wc: work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- * we only cleanup in case TX type was DATAOUT. For non-FLUSH
- * error completion we should also notify iscsi layer that
- * connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
- struct ib_wc *wc)
-{
- void *wr_id = (void *)(uintptr_t)wc->wr_id;
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
-
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- if (iser_conn->iscsi_conn)
- iscsi_conn_failure(iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
-
- if (wc->wr_id == ISER_FASTREG_LI_WRID)
- return;
-
- if (is_iser_tx_desc(iser_conn, wr_id)) {
- struct iser_tx_desc *desc = wr_id;
-
- if (desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
- } else {
- ib_conn->post_recv_buf_count--;
- }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
- struct ib_conn *ib_conn;
- struct iser_tx_desc *tx_desc;
- struct iser_rx_desc *rx_desc;
-
- ib_conn = wc->qp->qp_context;
- if (likely(wc->status == IB_WC_SUCCESS)) {
- if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
- iser_rcv_completion(rx_desc, wc->byte_len,
- ib_conn);
- } else
- if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
- iser_snd_completion(tx_desc, ib_conn);
- } else {
- iser_err("Unknown wc opcode %d\n", wc->opcode);
- }
- } else {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- iser_err("%s (%d): wr id %llx vend_err %x\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id, wc->vendor_err);
- else
- iser_dbg("%s (%d): wr id %llx\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id);
-
- if (wc->wr_id == ISER_BEACON_WRID)
- /* all flush errors were consumed */
- complete(&ib_conn->flush_comp);
- else
- iser_handle_comp_error(ib_conn, wc);
- }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
- struct iser_comp *comp = (struct iser_comp *)data;
- struct ib_cq *cq = comp->cq;
- struct ib_wc *const wcs = comp->wcs;
- int i, n, completed = 0;
-
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
- for (i = 0; i < n; i++)
- iser_handle_wc(&wcs[i]);
-
- completed += n;
- if (completed >= iser_cq_poll_limit)
- break;
- }
-
- /*
- * It is assumed here that arming CQ only once its empty
- * would not cause interrupts to be missed.
- */
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
- iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
- struct iser_comp *comp = cq_context;
-
- tasklet_schedule(&comp->tasklet);
-}
-
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
@@ -1319,3 +1160,21 @@ err:
/* Not alot we can do here, return ambiguous guard error */
return 0x1;
}
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+ iser_err("%s failure: %s (%d) vend_err %x\n", type,
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->vendor_err);
+
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ } else {
+ iser_dbg("%s failure: %s (%d)\n", type,
+ ib_wc_status_msg(wc->status), wc->status);
+ }
+}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 468c5e132563..f121e6129339 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -29,7 +29,6 @@
#include <target/iscsi/iscsi_transport.h>
#include <linux/semaphore.h>
-#include "isert_proto.h"
#include "ib_isert.h"
#define ISERT_MAX_CONN 8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
}
}
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
- int ret;
-
- ret = ib_query_device(ib_dev, devattr);
- if (ret) {
- isert_err("ib_query_device() failed: %d\n", ret);
- return ret;
- }
- isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
- isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
- return 0;
-}
-
static struct isert_comp *
isert_comp_get(struct isert_conn *isert_conn)
{
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- attr.cap.max_send_sge = device->dev_attr.max_sge;
- isert_conn->max_sge = min(device->dev_attr.max_sge,
- device->dev_attr.max_sge_rd);
+ attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+ isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+ device->ib_device->attrs.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
}
static int
-isert_alloc_comps(struct isert_device *device,
- struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
{
int i, max_cqe, ret = 0;
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
return -ENOMEM;
}
- max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
for (i = 0; i < device->comps_used; i++) {
struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
static int
isert_create_device_ib_res(struct isert_device *device)
{
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret;
- dev_attr = &device->dev_attr;
- ret = isert_query_device(device->ib_device, dev_attr);
- if (ret)
- goto out;
+ isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+ isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
/* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
device->use_fastreg = 1;
device->reg_rdma_mem = isert_reg_rdma;
device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
device->unreg_rdma_mem = isert_unmap_cmd;
}
- ret = isert_alloc_comps(device, dev_attr);
+ ret = isert_alloc_comps(device);
if (ret)
goto out;
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd)) {
ret = PTR_ERR(device->pd);
isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- device->pi_capable = dev_attr->device_cap_flags &
+ device->pi_capable = ib_dev->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
return 0;
@@ -676,6 +656,32 @@ out_login_buf:
return ret;
}
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+ struct rdma_conn_param *param)
+{
+ struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+ /* Set max inflight RDMA READ requests */
+ isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+ attr->max_qp_init_rd_atom);
+ isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+ if (param->private_data) {
+ u8 flags = *(u8 *)param->private_data;
+
+ /*
+ * use remote invalidation if the both initiator
+ * and the HCA support it
+ */
+ isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+ (attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (isert_conn->snd_w_inv)
+ isert_info("Using remote invalidation\n");
+ }
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
isert_conn->device = device;
- /* Set max inflight RDMA READ requests */
- isert_conn->initiator_depth = min_t(u8,
- event->param.conn.initiator_depth,
- device->dev_attr.max_qp_init_rd_atom);
- isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+ isert_set_nego_params(isert_conn, &event->param.conn);
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
- tx_desc->iser_header.flags = ISER_VER;
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+ tx_desc->iser_header.flags = ISCSI_CTRL;
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
- send_wr->opcode = IB_WR_SEND;
+
+ if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+ } else {
+ send_wr->opcode = IB_WR_SEND;
+ }
+
send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
isert_cmd->read_va = read_va;
isert_cmd->write_stag = write_stag;
isert_cmd->write_va = write_va;
+ isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
static void
isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
{
- struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+ struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
uint64_t read_va = 0, write_va = 0;
uint32_t read_stag = 0, write_stag = 0;
- switch (iser_hdr->flags & 0xF0) {
+ switch (iser_ctrl->flags & 0xF0) {
case ISCSI_CTRL:
- if (iser_hdr->flags & ISER_RSV) {
- read_stag = be32_to_cpu(iser_hdr->read_stag);
- read_va = be64_to_cpu(iser_hdr->read_va);
+ if (iser_ctrl->flags & ISER_RSV) {
+ read_stag = be32_to_cpu(iser_ctrl->read_stag);
+ read_va = be64_to_cpu(iser_ctrl->read_va);
isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
read_stag, (unsigned long long)read_va);
}
- if (iser_hdr->flags & ISER_WSV) {
- write_stag = be32_to_cpu(iser_hdr->write_stag);
- write_va = be64_to_cpu(iser_hdr->write_va);
+ if (iser_ctrl->flags & ISER_WSV) {
+ write_stag = be32_to_cpu(iser_ctrl->write_stag);
+ write_va = be64_to_cpu(iser_ctrl->write_va);
isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
write_stag, (unsigned long long)write_va);
}
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
isert_err("iSER Hello message\n");
break;
default:
- isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+ isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
break;
}
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_conn_param cp;
int ret;
+ struct iser_cm_hdr rsp_hdr;
memset(&cp, 0, sizeof(struct rdma_conn_param));
cp.initiator_depth = isert_conn->initiator_depth;
cp.retry_count = 7;
cp.rnr_retry_count = 7;
+ memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+ rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+ if (!isert_conn->snd_w_inv)
+ rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+ cp.private_data = (void *)&rsp_hdr;
+ cp.private_data_len = sizeof(rsp_hdr);
+
ret = rdma_accept(cm_id, &cp);
if (ret) {
isert_err("rdma_accept() failed with: %d\n", ret);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3d7fbc47c343..8d50453eef66 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,8 @@
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
#define DRV_NAME "isert"
#define PFX DRV_NAME ": "
@@ -31,6 +33,38 @@
#define isert_err(fmt, arg...) \
pr_err(PFX "%s: " fmt, __func__ , ## arg)
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + \
+ sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+ * NOOP_OUT(2), TEXT(1),
+ * SCSI_TMFUNC(2), LOGOUT(1)
+ */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS 8
+
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+ (1 + ISERT_INFLIGHT_DATAOUTS) + \
+ ISERT_MAX_TX_MISC_PDUS + \
+ ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
+ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
#define ISCSI_ISER_SG_TABLESIZE 256
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
};
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
} __packed;
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum isert_desc_type type;
u64 dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
uint32_t write_stag;
uint64_t read_va;
uint64_t write_va;
+ uint32_t inv_rkey;
u64 pdu_buf_dma;
u32 pdu_buf_len;
struct isert_conn *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
struct work_struct release_work;
struct ib_recv_wr beacon;
bool logout_posted;
+ bool snd_w_inv;
};
#define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- struct ib_device_attr dev_attr;
int (*reg_rdma_mem)(struct iscsi_conn *conn,
struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644
index 4dccd313b777..000000000000
--- a/drivers/infiniband/ulp/isert/isert_proto.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag; /* write rkey */
- __be64 write_va;
- __be32 read_stag; /* read rkey */
- __be64 read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN 8192
-#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
- * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
- ISERT_MAX_TX_MISC_PDUS + \
- ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
- (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-#define ISCSI_CTRL 0x10
-#define ISER_HELLO 0x20
-#define ISER_HELLORPLY 0x30
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 3db9a659719b..03022f6420d7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+ .done = srp_drain_done,
+};
+
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
- static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+ static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
+ wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = dev->use_fast_reg ? 3 : 1;
- struct ib_cq_init_attr cq_attr = {};
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- /* + 1 for SRP_LAST_WR_ID */
- cq_attr.cqe = target->queue_size + 1;
- cq_attr.comp_vector = ch->comp_vector;
- recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
- &cq_attr);
+ /* queue_size + 1 for ib_drain_qp */
+ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+ ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- cq_attr.cqe = m * target->queue_size;
- cq_attr.comp_vector = ch->comp_vector;
- send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
- &cq_attr);
+ send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+ ch->comp_vector, IB_POLL_DIRECT);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
}
- ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (ch->qp)
srp_destroy_qp(ch);
if (ch->recv_cq)
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->recv_cq);
if (ch->send_cq)
- ib_destroy_cq(ch->send_cq);
+ ib_free_cq(ch->send_cq);
ch->qp = qp;
ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- ib_destroy_qp(qp);
+ srp_destroy_qp(ch);
err_send_cq:
- ib_destroy_cq(send_cq);
+ ib_free_cq(send_cq);
err_recv_cq:
- ib_destroy_cq(recv_cq);
+ ib_free_cq(recv_cq);
err:
kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
if (ch->fmr_pool)
ib_destroy_fmr_pool(ch->fmr_pool);
}
+
srp_destroy_qp(ch);
- ib_destroy_cq(ch->send_cq);
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->send_cq);
+ ib_free_cq(ch->recv_cq);
/*
* Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
return ret <= 0 ? ret : -ENODEV;
}
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+ u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
.opcode = IB_WR_LOCAL_INV,
- .wr_id = LOCAL_INV_WR_ID_MASK,
.next = NULL,
.num_sge = 0,
.send_flags = 0,
.ex.invalidate_rkey = rkey,
};
+ wr.wr_cqe = &req->reg_cqe;
+ req->reg_cqe.done = srp_inv_rkey_err_done;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_fr_desc **pfr;
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
- res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+ res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
return 0;
}
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_request *req,
struct srp_rdma_ch *ch, int sg_nents)
{
struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
if (unlikely(n < 0))
return n;
+ req->reg_cqe.done = srp_reg_mr_err_done;
+
wr.wr.next = NULL;
wr.wr.opcode = IB_WR_REG_MR;
- wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.wr_cqe = &req->reg_cqe;
wr.wr.num_sge = 0;
wr.wr.send_flags = 0;
wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
while (count) {
int i, n;
- n = srp_map_finish_fr(state, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count);
if (unlikely(n < 0))
return n;
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1);
if (ret < 0)
return ret;
} else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
- srp_send_completion(ch->send_cq, ch);
+ ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "SEND");
+ return;
+ }
+
+ list_add(&iu->list, &ch->free_tx);
+}
+
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
list.length = len;
list.lkey = target->lkey;
+ iu->cqe.done = srp_send_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
list.length = iu->size;
list.lkey = target->lkey;
+ iu->cqe.done = srp_recv_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
"problems processing SRP_AER_REQ\n");
}
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
- struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
int res;
u8 opcode;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "RECV");
+ return;
+ }
+
ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
srp_start_tl_fail_timers(target->rport);
}
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
- bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname)
{
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
- if (wr_id == SRP_LAST_WR_ID) {
- complete(&ch->done);
- return;
- }
-
if (ch->connected && !target->qp_in_error) {
- if (wr_id & LOCAL_INV_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "LOCAL_INV failed with status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else if (wr_id & FAST_REG_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "FAST_REG_MR failed status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %s (%d) for iu %p\n",
- send_err ? "send" : "receive",
- ib_wc_status_msg(wc_status), wc_status,
- (void *)(uintptr_t)wr_id);
- }
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %s (%d) for CQE %p\n",
+ opname, ib_wc_status_msg(wc->status), wc->status,
+ wc->wr_cqe);
queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
}
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- srp_handle_recv(ch, &wc);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
- }
- }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
- struct srp_iu *iu;
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
- list_add(&iu->list, &ch->free_tx);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
- }
- }
-}
-
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
- struct ib_device_attr *dev_attr;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- pr_warn("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
if (!srp_dev)
- goto free_attr;
+ return;
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (dev_attr->device_cap_flags &
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = dev_attr->max_mr_size;
+ max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- dev_attr->max_fast_reg_page_list_len);
+ device->attrs.max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
- pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, dev_attr->max_mr_size,
- dev_attr->max_fast_reg_page_list_len,
+ pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, device->attrs.max_mr_size,
+ device->attrs.max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
}
ib_set_client_data(device, &srp_client, srp_dev);
-
- goto free_attr;
+ return;
err_pd:
ib_dealloc_pd(srp_dev->pd);
free_dev:
kfree(srp_dev);
-
-free_attr:
- kfree(dev_attr);
}
static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
{
int ret;
- BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index f6af531f9f32..9e05ce4a04fd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,11 +66,6 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
-
- LOCAL_INV_WR_ID_MASK = 1,
- FAST_REG_WR_ID_MASK = 2,
-
- SRP_LAST_WR_ID = 0xfffffffcU,
};
enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
short nmdesc;
+ struct ib_cqe reg_cqe;
};
/**
@@ -231,6 +227,7 @@ struct srp_iu {
void *buf;
size_t size;
enum dma_data_direction direction;
+ struct ib_cqe cqe;
};
/**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bc5470c43d26..0c37fee363b1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
static struct ib_client srpt_client;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
/**
* opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
memset(iocp, 0, sizeof *iocp);
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
- iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
- iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
- iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
- iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+ iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+ iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+ iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+ iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
iocp->subsys_device_id = 0x0;
iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
* srpt_mad_recv_handler() - MAD reception callback function.
*/
static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_wc)
{
struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
struct ib_recv_wr wr, *bad_wr;
BUG_ON(!sdev);
- wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
list.addr = ioctx->ioctx.dma;
list.length = srp_max_req_size;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_recv_done;
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.next = NULL;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
list.length = len;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_send_done;
wr.next = NULL;
- wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
BUG_ON(!ch);
BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+ BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
while (ioctx->n_rdma)
- kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+ kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
- kfree(ioctx->rdma_ius);
- ioctx->rdma_ius = NULL;
+ kfree(ioctx->rdma_wrs);
+ ioctx->rdma_wrs = NULL;
if (ioctx->mapped_sg_count) {
sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
struct scatterlist *sg, *sg_orig;
int sg_cnt;
enum dma_data_direction dir;
- struct rdma_iu *riu;
+ struct ib_rdma_wr *riu;
struct srp_direct_buf *db;
dma_addr_t dma_addr;
struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
ioctx->mapped_sg_count = count;
- if (ioctx->rdma_ius && ioctx->n_rdma_ius)
- nrdma = ioctx->n_rdma_ius;
+ if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+ nrdma = ioctx->n_rdma_wrs;
else {
nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
+ ioctx->n_rbuf;
- ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
- if (!ioctx->rdma_ius)
+ ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+ GFP_KERNEL);
+ if (!ioctx->rdma_wrs)
goto free_mem;
- ioctx->n_rdma_ius = nrdma;
+ ioctx->n_rdma_wrs = nrdma;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
/*
* For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
raddr = be64_to_cpu(db->va);
- riu->raddr = raddr;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
- riu->sge_cnt = 0;
+ riu->wr.num_sge = 0;
/* calculate how many sge required for this remote_buf */
while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
rsize = 0;
}
- ++riu->sge_cnt;
+ ++riu->wr.num_sge;
- if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+ if (rsize > 0 &&
+ riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
++ioctx->n_rdma;
- riu->sge =
- kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
++riu;
- riu->sge_cnt = 0;
- riu->raddr = raddr;
+ riu->wr.num_sge = 0;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
}
}
++ioctx->n_rdma;
- riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
sg = sg_orig;
dma_len = ib_sg_dma_len(dev, &sg[0]);
dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
for (i = 0, j = 0;
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
}
++k;
- if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+ if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
++riu;
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
} else if (rsize > 0 && tsize > 0)
++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
ioctx->n_rbuf = 0;
ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_ius = 0;
- ioctx->rdma_ius = NULL;
+ ioctx->n_rdma_wrs = 0;
+ ioctx->rdma_wrs = NULL;
ioctx->mapped_sg_count = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
}
/**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
- struct srpt_send_ioctx *ioctx;
- enum srpt_command_state state;
- u32 index;
-
- atomic_inc(&ch->sq_wr_avail);
-
- index = idx_from_wr_id(wr_id);
- ioctx = ch->ioctx_ring[index];
- state = srpt_get_cmd_state(ioctx);
-
- WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_NEED_DATA
- && state != SRPT_STATE_DONE);
-
- /* If SRP_RSP sending failed, undo the ch->req_lim change. */
- if (state == SRPT_STATE_CMD_RSP_SENT
- || state == SRPT_STATE_MGMT_RSP_SENT)
- atomic_dec(&ch->req_lim);
-
- srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- enum srpt_command_state state;
-
- atomic_inc(&ch->sq_wr_avail);
-
- state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
- if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_DONE))
- pr_debug("state = %d\n", state);
-
- if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- transport_generic_free_cmd(&ioctx->cmd, 0);
- } else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
- }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
* XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
* the data that has been transferred via IB RDMA had to be postponed until the
* check_stop_free() callback. None of this is necessary anymore and needs to
* be cleaned up.
*/
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (opcode == SRPT_RDMA_READ_LAST) {
- if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
- SRPT_STATE_DATA_IN))
- target_execute_cmd(&ioctx->cmd);
- else
- pr_err("%s[%d]: wrong state = %d\n", __func__,
- __LINE__, srpt_get_cmd_state(ioctx));
- } else if (opcode == SRPT_RDMA_ABORT) {
- ioctx->rdma_aborted = true;
- } else {
- WARN(true, "unexpected opcode %d\n", opcode);
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
+ return;
}
+
+ if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+ SRPT_STATE_DATA_IN))
+ target_execute_cmd(&ioctx->cmd);
+ else
+ pr_err("%s[%d]: wrong state = %d\n", __func__,
+ __LINE__, srpt_get_cmd_state(ioctx));
}
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
- enum srpt_command_state state;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
- state = srpt_get_cmd_state(ioctx);
- switch (opcode) {
- case SRPT_RDMA_READ_LAST:
- if (ioctx->n_rdma <= 0) {
- pr_err("Received invalid RDMA read"
- " error completion with idx %d\n",
- ioctx->ioctx.index);
- break;
- }
- atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (state == SRPT_STATE_NEED_DATA)
- srpt_abort_cmd(ioctx);
- else
- pr_err("%s[%d]: wrong state = %d\n",
- __func__, __LINE__, state);
- break;
- case SRPT_RDMA_WRITE_LAST:
- break;
- default:
- pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
- break;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
}
}
@@ -1926,32 +1859,26 @@ out:
return;
}
-static void srpt_process_rcv_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_device *sdev = ch->sport->sdev;
- struct srpt_recv_ioctx *ioctx;
- u32 index;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_recv_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
- index = idx_from_wr_id(wc->wr_id);
if (wc->status == IB_WC_SUCCESS) {
int req_lim;
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
- ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL);
} else {
- pr_info("receiving failed for idx %u with status %d\n",
- index, wc->status);
+ pr_info("receiving failed for ioctx %p with status %d\n",
+ ioctx, wc->status);
}
}
/**
- * srpt_process_send_completion() - Process an IB send completion.
- *
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
* srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,109 +1891,52 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
* are queued on cmd_wait_list. The code below processes these delayed
* requests one at a time.
*/
-static void srpt_process_send_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_send_ioctx *send_ioctx;
- uint32_t index;
- enum srpt_opcode opcode;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+ enum srpt_command_state state;
- index = idx_from_wr_id(wc->wr_id);
- opcode = opcode_from_wr_id(wc->wr_id);
- send_ioctx = ch->ioctx_ring[index];
- if (wc->status == IB_WC_SUCCESS) {
- if (opcode == SRPT_SEND)
- srpt_handle_send_comp(ch, send_ioctx);
- else {
- WARN_ON(opcode != SRPT_RDMA_ABORT &&
- wc->opcode != IB_WC_RDMA_READ);
- srpt_handle_rdma_comp(ch, send_ioctx, opcode);
- }
+ state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+ WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+ state != SRPT_STATE_MGMT_RSP_SENT);
+
+ atomic_inc(&ch->sq_wr_avail);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_info("sending response for ioctx 0x%p failed"
+ " with status %d\n", ioctx, wc->status);
+
+ atomic_dec(&ch->req_lim);
+ srpt_abort_cmd(ioctx);
+ goto out;
+ }
+
+ if (state != SRPT_STATE_DONE) {
+ srpt_unmap_sg_to_ib_sge(ch, ioctx);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- if (opcode == SRPT_SEND) {
- pr_info("sending response for idx %u failed"
- " with status %d\n", index, wc->status);
- srpt_handle_send_err_comp(ch, wc->wr_id);
- } else if (opcode != SRPT_RDMA_MID) {
- pr_info("RDMA t %d for idx %u failed with"
- " status %d\n", opcode, index, wc->status);
- srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
- }
+ pr_err("IB completion has been received too late for"
+ " wr_id = %u.\n", ioctx->ioctx.index);
}
- while (unlikely(opcode == SRPT_SEND
- && !list_empty(&ch->cmd_wait_list)
- && srpt_get_ch_state(ch) == CH_LIVE
- && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+ while (!list_empty(&ch->cmd_wait_list) &&
+ srpt_get_ch_state(ch) == CH_LIVE &&
+ (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
- }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
- struct ib_wc *const wc = ch->wc;
- int i, n;
-
- WARN_ON(cq != ch->cq);
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
- for (i = 0; i < n; i++) {
- if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
- srpt_process_rcv_completion(cq, ch, &wc[i]);
- else
- srpt_process_send_completion(cq, ch, &wc[i]);
- }
+ srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
}
/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- * concurrently on two different CPUs for the same completion queue. See also
- * Documentation/infiniband/core_locking.txt and the implementation of
- * handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- * context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
- struct srpt_rdma_ch *ch = ctx;
-
- wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
- struct srpt_rdma_ch *ch;
-
- /* Hibernation / freezing of the SRPT kernel thread is not supported. */
- current->flags |= PF_NOFREEZE;
-
- ch = arg;
- BUG_ON(!ch);
- pr_info("Session %s: kernel thread %s (PID %d) started\n",
- ch->sess_name, ch->thread->comm, current->pid);
- while (!kthread_should_stop()) {
- wait_event_interruptible(ch->wait_queue,
- (srpt_process_completion(ch->cq, ch),
- kthread_should_stop()));
- }
- pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
- ch->sess_name, ch->thread->comm, current->pid);
- return 0;
-}
-
-/**
* srpt_create_ch_ib() - Create receive and send completion queues.
*/
static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
u32 srp_sq_size = sport->port_attrib.srp_sq_size;
- struct ib_cq_init_attr cq_attr = {};
int ret;
WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto out;
retry:
- cq_attr.cqe = ch->rq_size + srp_sq_size;
- ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
- &cq_attr);
+ ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+ 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
if (ret)
goto err_destroy_qp;
- init_waitqueue_head(&ch->wait_queue);
-
- pr_debug("creating thread for session %s\n", ch->sess_name);
-
- ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
- if (IS_ERR(ch->thread)) {
- pr_err("failed to create kernel thread %ld\n",
- PTR_ERR(ch->thread));
- ch->thread = NULL;
- goto err_destroy_qp;
- }
-
out:
kfree(qp_init);
return ret;
@@ -2150,17 +2006,14 @@ out:
err_destroy_qp:
ib_destroy_qp(ch->qp);
err_destroy_cq:
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
goto out;
}
static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
{
- if (ch->thread)
- kthread_stop(ch->thread);
-
ib_destroy_qp(ch->qp);
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
}
/**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
- struct ib_rdma_wr wr;
struct ib_send_wr *bad_wr;
- struct rdma_iu *riu;
- int i;
- int ret;
- int sq_wr_avail;
+ int sq_wr_avail, ret, i;
enum dma_data_direction dir;
const int n_rdma = ioctx->n_rdma;
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
}
}
- ioctx->rdma_aborted = false;
- ret = 0;
- riu = ioctx->rdma_ius;
- memset(&wr, 0, sizeof wr);
-
- for (i = 0; i < n_rdma; ++i, ++riu) {
- if (dir == DMA_FROM_DEVICE) {
- wr.wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_WRITE_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- } else {
- wr.wr.opcode = IB_WR_RDMA_READ;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_READ_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- }
- wr.wr.next = NULL;
- wr.remote_addr = riu->raddr;
- wr.rkey = riu->rkey;
- wr.wr.num_sge = riu->sge_cnt;
- wr.wr.sg_list = riu->sge;
+ for (i = 0; i < n_rdma; i++) {
+ struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
- /* only get completion event for the last rdma write */
- if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
- wr.wr.send_flags = IB_SEND_SIGNALED;
+ wr->opcode = (dir == DMA_FROM_DEVICE) ?
+ IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (ret)
- break;
+ if (i == n_rdma - 1) {
+ /* only get completion event for the last rdma read */
+ if (dir == DMA_TO_DEVICE) {
+ wr->send_flags = IB_SEND_SIGNALED;
+ ioctx->rdma_cqe.done = srpt_rdma_read_done;
+ } else {
+ ioctx->rdma_cqe.done = srpt_rdma_write_done;
+ }
+ wr->wr_cqe = &ioctx->rdma_cqe;
+ wr->next = NULL;
+ } else {
+ wr->wr_cqe = NULL;
+ wr->next = &ioctx->rdma_wrs[i + 1].wr;
+ }
}
+ ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
if (ret)
pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
- if (ret && i > 0) {
- wr.wr.num_sge = 0;
- wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
- wr.wr.send_flags = IB_SEND_SIGNALED;
- while (ch->state == CH_LIVE &&
- ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
- pr_info("Trying to abort failed RDMA transfer [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
- pr_info("Waiting until RDMA abort finished [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- }
out:
if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
init_waitqueue_head(&sdev->ch_releaseQ);
spin_lock_init(&sdev->spinlock);
- if (ib_query_device(device, &sdev->dev_attr))
- goto free_dev;
-
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
goto free_dev;
- sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+ sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
srq_attr.event_handler = srpt_srq_event;
srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
- __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+ __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
device->name);
if (!srpt_service_guid)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 5366e0a9fd6d..09037f2b0b51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -128,36 +128,6 @@ enum {
DEFAULT_MAX_RDMA_SIZE = 65536,
};
-enum srpt_opcode {
- SRPT_RECV,
- SRPT_SEND,
- SRPT_RDMA_MID,
- SRPT_RDMA_ABORT,
- SRPT_RDMA_READ_LAST,
- SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
- return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
- return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
- return (u32)wr_id;
-}
-
-struct rdma_iu {
- u64 raddr;
- u32 rkey;
- struct ib_sge *sge;
- u32 sge_cnt;
- int mem_id;
-};
-
/**
* enum srpt_command_state - SCSI command state managed by SRPT.
* @SRPT_STATE_NEW: New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
+ struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
uint32_t index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
* @sg: Pointer to sg-list associated with this I/O context.
* @sg_cnt: SG-list size.
* @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius: Number of elements in the rdma_ius array.
- * @rdma_ius: Array with information about the RDMA mapping.
+ * @n_rdma_wrs: Number of elements in the rdma_wrs array.
+ * @rdma_wrs: Array with information about the RDMA mapping.
* @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- * the already initiated transfers have finished.
* @cmd: Target core command data structure.
* @sense_data: SCSI sense data.
*/
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct rdma_iu *rdma_ius;
+ struct ib_rdma_wr *rdma_wrs;
+ struct ib_cqe rdma_cqe;
struct srp_direct_buf *rbufs;
struct srp_direct_buf single_rbuf;
struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
- bool rdma_aborted;
struct se_cmd cmd;
struct completion tx_done;
int sg_cnt;
int mapped_sg_count;
- u16 n_rdma_ius;
+ u16 n_rdma_wrs;
u8 n_rdma;
u8 n_rbuf;
bool queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
/**
* struct srpt_rdma_ch - RDMA channel.
- * @wait_queue: Allows the kernel thread to wait for more work.
- * @thread: Kernel thread that processes the IB queues associated with
- * the channel.
* @cm_id: IB CM ID associated with the channel.
* @qp: IB queue pair used for communicating over this channel.
* @cq: IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @ioctx_ring: Send ring.
- * @wc: IB work completion array for srpt_process_completion().
* @list: Node for insertion in the srpt_device.rch_list list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
struct srpt_rdma_ch {
- wait_queue_head_t wait_queue;
- struct task_struct *thread;
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
struct list_head free_list;
enum rdma_ch_state state;
struct srpt_send_ioctx **ioctx_ring;
- struct ib_wc wc[16];
struct list_head list;
struct list_head cmd_wait_list;
struct se_session *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
* @mr: L_Key (local key) with write access to all local memory.
* @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier.
- * @dev_attr: Attributes of the InfiniBand device as obtained during the
- * ib_client.add() callback.
* @srq_size: SRQ size.
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_cm_id *cm_id;
- struct ib_device_attr dev_attr;
int srq_size;
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;