summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2014-11-20 16:42:02 +0300
committerJiri Kosina <jkosina@suse.cz>2014-11-20 16:42:02 +0300
commita02001086bbfb4da35d1228bebc2f1b442db455f (patch)
tree62ab47936cef06fd08657ca5b6cd1df98c19be57 /drivers/infiniband
parenteff264efeeb0898408e8c9df72d8a32621035bed (diff)
parentfc14f9c1272f62c3e8d01300f52467c0d9af50f9 (diff)
downloadlinux-a02001086bbfb4da35d1228bebc2f1b442db455f.tar.xz
Merge Linus' tree to be be to apply submitted patches to newer code than
current trivial.git base
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/agent.c16
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/iwcm.c27
-rw-r--r--drivers/infiniband/core/mad.c283
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/umem.c19
-rw-r--r--drivers/infiniband/core/user_mad.c188
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c95
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c32
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c37
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c6
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c217
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c91
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c68
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c91
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c18
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c219
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c12
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c49
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c252
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c95
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h533
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c52
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c133
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c343
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h408
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c198
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c99
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c693
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c89
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c5
65 files changed, 3091 insertions, 1523 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 2bc7f5af64f4..f6d29614cb01 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
- printk(KERN_ERR SPFX "Unable to find port agent\n");
+ dev_err(&device->dev, "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
PTR_ERR(ah));
return;
}
@@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
- printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+ dev_err(&device->dev, "ib_create_send_mad error\n");
goto err1;
}
@@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
}
if (ib_post_send_mad(send_buf, NULL)) {
- printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+ dev_err(&device->dev, "ib_post_send_mad error\n");
goto err2;
}
return;
@@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[0] = ib_register_mad_agent(device, port_num,
IB_QPT_SMI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[0])) {
ret = PTR_ERR(port_priv->agent[0]);
goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
port_priv->agent[1] = ib_register_mad_agent(device, port_num,
IB_QPT_GSI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(port_priv->agent[1])) {
ret = PTR_ERR(port_priv->agent[1]);
goto error3;
@@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c3239170d8b7..e28a494e2a3a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
struct cm_port *port;
struct ib_mad_reg_req reg_req = {
.mgmt_class = IB_MGMT_CLASS_CM,
- .mgmt_class_version = IB_CM_CLASS_VERSION
+ .mgmt_class_version = IB_CM_CLASS_VERSION,
};
struct ib_port_modify port_modify = {
.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
0,
cm_send_handler,
cm_recv_handler,
- port);
+ port,
+ 0);
if (IS_ERR(port->mad_agent))
goto error2;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 3d2e489ab732..ff9163dc1596 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/sysctl.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
struct list_head free_list;
};
+static unsigned int default_backlog = 256;
+
+static struct ctl_table_header *iwcm_ctl_table_hdr;
+static struct ctl_table iwcm_ctl_table[] = {
+ {
+ .procname = "default_backlog",
+ .data = &default_backlog,
+ .maxlen = sizeof(default_backlog),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
/*
* The following services provide a mechanism for pre-allocating iwcm_work
* elements. The design pre-allocates them based on the cm_id type:
@@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ if (!backlog)
+ backlog = default_backlog;
+
ret = alloc_work_entries(cm_id_priv, backlog);
if (ret)
return ret;
@@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void)
if (!iwcm_wq)
return -ENOMEM;
+ iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
+ iwcm_ctl_table);
+ if (!iwcm_ctl_table_hdr) {
+ pr_err("iw_cm: couldn't register sysctl paths\n");
+ destroy_workqueue(iwcm_wq);
+ return -ENOMEM;
+ }
+
return 0;
}
static void __exit iw_cm_cleanup(void)
{
+ unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ab31f136d04b..74c30f4c557e 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -33,6 +33,9 @@
* SOFTWARE.
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
- void *context)
+ void *context,
+ u32 registration_flags)
{
struct ib_mad_port_private *port_priv;
struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
- if (qpn == -1)
+ if (qpn == -1) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid QP Type %d\n",
+ qp_type);
goto error1;
+ }
- if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
+ if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid RMPP Version %u\n",
+ rmpp_version);
goto error1;
+ }
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
- if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+ if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: invalid Class Version %u\n",
+ mad_reg_req->mgmt_class_version);
goto error1;
- if (!recv_handler)
+ }
+ if (!recv_handler) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: no recv_handler\n");
goto error1;
+ }
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
/*
* IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
* one in this range currently allowed
*/
if (mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else if (mad_reg_req->mgmt_class == 0) {
/*
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid Mgmt Class 0\n");
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
* If class is in "new" vendor range,
* ensure supplied OUI is not zero
*/
- if (!is_vendor_oui(mad_reg_req->oui))
+ if (!is_vendor_oui(mad_reg_req->oui)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: No OUI specified for class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
- if (rmpp_version)
+ if (rmpp_version) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
+
/* Make sure class supplied is consistent with QP type */
if (qp_type == IB_QPT_SMI) {
if ((mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
} else {
if ((mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
- IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
+ mad_reg_req->mgmt_class);
goto error1;
+ }
}
} else {
/* No registration request supplied */
if (!send_handler)
goto error1;
+ if (registration_flags & IB_MAD_USER_RMPP)
+ goto error1;
}
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
+ dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0 */
if (!port_priv->qp_info[qpn].qp) {
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: QP %d not supported\n", qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
mad_agent_priv->agent.context = context;
mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
mad_agent_priv->agent.port_num = port_num;
+ mad_agent_priv->agent.flags = registration_flags;
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
@@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
IB_SMI_DISCARD) {
ret = -EINVAL;
- printk(KERN_ERR PFX "Invalid directed route\n");
+ dev_err(&device->dev, "Invalid directed route\n");
goto out;
}
@@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for local response MAD\n");
+ dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
- "alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
+ dev_err(&send_buf->mad_agent->device->dev,
+ "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
return 0;
}
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
+{
+ return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
+}
+EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
+
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
int rmpp_active,
@@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
pad = get_pad_size(hdr_len, data_len);
message_size = hdr_len + data_len + pad;
- if ((!mad_agent->rmpp_version &&
- (rmpp_active || message_size > sizeof(struct ib_mad))) ||
- (!rmpp_active && message_size > sizeof(struct ib_mad)))
- return ERR_PTR(-EINVAL);
+ if (ib_mad_kernel_rmpp_agent(mad_agent)) {
+ if (!rmpp_active && message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
+ } else
+ if (rmpp_active || message_size > sizeof(struct ib_mad))
+ return ERR_PTR(-EINVAL);
size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
&mad_agent_priv->send_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_send_rmpp_mad(mad_send_wr);
if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
ret = ib_send_mad(mad_send_wr);
@@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp);
int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
struct ib_wc *wc)
{
- printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ dev_err(&mad_agent->device->dev,
+ "ib_process_mad_wc() not implemented yet\n");
return 0;
}
EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
- printk(KERN_ERR PFX "Method %d already in use\n", i);
+ pr_err("Method %d already in use\n", i);
return -EINVAL;
}
}
@@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
if (!*method) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_method_table\n");
+ pr_err("No memory for ib_mad_mgmt_method_table\n");
return -ENOMEM;
}
@@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
if (!vendor) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class_table\n");
goto error1;
}
@@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
if (!vendor_class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class\n");
goto error2;
}
@@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto check_in_use;
}
}
- printk(KERN_ERR PFX "All OUI slots in use\n");
+ dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
goto error3;
check_in_use:
@@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
if (mad_agent->agent.recv_handler)
atomic_inc(&mad_agent->refcount);
else {
- printk(KERN_NOTICE PFX "No receive handler for client "
- "%p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
mad_agent = NULL;
}
}
@@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num)
/* Make sure MAD base version is understood */
if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
- printk(KERN_ERR PFX "MAD received with unsupported base "
- "version %d\n", mad->mad_hdr.base_version);
+ pr_err("MAD received with unsupported base version %d\n",
+ mad->mad_hdr.base_version);
goto out;
}
@@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
return !mad_agent_priv->agent.rmpp_version ||
+ !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE) ||
(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
mad_recv_wc);
if (!mad_recv_wc) {
@@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
if (!mad_send_wr) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- ib_free_recv_mad(mad_recv_wc);
- deref_mad_agent(mad_agent_priv);
- return;
- }
- ib_mark_mad_done(mad_send_wr);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
+ && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
+ & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ /* user rmpp is in effect
+ * and this is an active RMPP MAD
+ */
+ mad_recv_wc->wc->wr_id = 0;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+ } else {
+ /* not user rmpp, revert to normal behavior and
+ * drop the mad */
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+ } else {
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- /* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ /* Defined behavior is to complete response before request */
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
- mad_send_wc.status = IB_WC_SUCCESS;
- mad_send_wc.vendor_err = 0;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ }
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
@@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!response) {
- printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
- "for response buffer\n");
+ dev_err(&port_priv->device->dev,
+ "ib_mad_recv_done_handler no memory for response buffer\n");
goto out;
}
@@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
mad_agent_priv = mad_send_wr->mad_agent_priv;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- if (mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
if (ret == IB_RMPP_RESULT_CONSUMED)
goto done;
@@ -2176,7 +2249,8 @@ retry:
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
&bad_send_wr);
if (ret) {
- printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
wc->status = IB_WC_LOC_QP_OP_ERR;
goto retry;
@@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
IB_QP_STATE | IB_QP_CUR_STATE);
kfree(attr);
if (ret)
- printk(KERN_ERR PFX "mad_error_handler - "
- "ib_modify_qp to RTS : %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "mad_error_handler - ib_modify_qp to RTS : %d\n",
+ ret);
else
mark_sends_for_retry(qp_info);
}
@@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work)
if (local->mad_priv) {
recv_mad_agent = local->recv_mad_agent;
if (!recv_mad_agent) {
- printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ dev_err(&mad_agent_priv->agent.device->dev,
+ "No receive MAD agent for local completion\n");
free_mad = 1;
goto local_send_completion;
}
@@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
- if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+ if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
switch (ret) {
case IB_RMPP_RESULT_UNHANDLED:
@@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!mad_priv) {
- printk(KERN_ERR PFX "No memory for receive buffer\n");
+ dev_err(&qp_info->port_priv->device->dev,
+ "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof mad_priv->header,
DMA_FROM_DEVICE);
kmem_cache_free(ib_mad_cache, mad_priv);
- printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ dev_err(&qp_info->port_priv->device->dev,
+ "ib_post_recv failed: %d\n", ret);
break;
}
} while (post);
@@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
- printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't kmalloc ib_qp_attr\n");
return -ENOMEM;
}
@@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "INIT: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to INIT: %d\n",
+ i, ret);
goto out;
}
attr->qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTR: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTR: %d\n",
+ i, ret);
goto out;
}
@@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
attr->sq_psn = IB_MAD_SEND_Q_PSN;
ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTS: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTS: %d\n",
+ i, ret);
goto out;
}
}
ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
if (ret) {
- printk(KERN_ERR PFX "Failed to request completion "
- "notification: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "Failed to request completion notification: %d\n",
+ ret);
goto out;
}
@@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
if (ret) {
- printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't post receive WRs\n");
goto out;
}
}
@@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
struct ib_mad_qp_info *qp_info = qp_context;
/* It's worse than that! He's dead, Jim! */
- printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ dev_err(&qp_info->port_priv->device->dev,
+ "Fatal error (%d) on MAD QP (%d)\n",
event->event, qp_info->qp->qp_num);
}
@@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
qp_init_attr.event_handler = qp_event_handler;
qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
if (IS_ERR(qp_info->qp)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
- get_spl_qp_index(qp_type));
+ dev_err(&qp_info->port_priv->device->dev,
+ "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
ret = PTR_ERR(qp_info->qp);
goto error;
}
@@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_port_private\n");
return -ENOMEM;
}
@@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device,
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
if (IS_ERR(port_priv->cq)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
goto error3;
}
port_priv->pd = ib_alloc_pd(device);
if (IS_ERR(port_priv->pd)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error4;
}
port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(port_priv->mr)) {
- printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
ret = PTR_ERR(port_priv->mr);
goto error5;
}
@@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device,
ret = ib_mad_port_start(port_priv);
if (ret) {
- printk(KERN_ERR PFX "Couldn't start port\n");
+ dev_err(&device->dev, "Couldn't start port\n");
goto error9;
}
@@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device)
for (i = start; i <= end; i++) {
if (ib_mad_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
if (ib_agent_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't open port %d for agents\n", i);
goto error_agent;
}
}
@@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device)
error_agent:
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
error:
i--;
while (i >= start) {
if (ib_agent_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n", i);
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
i--;
}
}
@@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device)
}
for (i = 0; i < num_ports; i++, cur_port++) {
if (ib_agent_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, cur_port);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n",
+ cur_port);
if (ib_mad_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ dev_err(&device->dev, "Couldn't close port %d\n",
+ cur_port);
}
}
@@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void)
SLAB_HWCACHE_ALIGN,
NULL);
if (!ib_mad_cache) {
- printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ pr_err("Couldn't create ib_mad cache\n");
ret = -ENOMEM;
goto error1;
}
@@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void)
INIT_LIST_HEAD(&ib_mad_port_list);
if (ib_register_client(&mad_client)) {
- printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ pr_err("Couldn't register ib_mad client\n");
ret = -EINVAL;
goto error2;
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4969c5..d1a0b0ee9444 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -42,9 +42,6 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
-
-#define PFX "ib_mad: "
-
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf541f55..c38f030f0dc9 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
- recv_handler, sa_dev);
+ recv_handler, sa_dev, 0);
if (IS_ERR(sa_dev->port[i].agent))
goto err;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a3a2e9c1639b..df0c4f605a21 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -105,6 +105,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
@@ -198,6 +199,7 @@ out:
if (ret < 0) {
if (need_release)
__ib_umem_release(context->device, umem, 0);
+ put_pid(umem->pid);
kfree(umem);
} else
current->mm->pinned_vm = locked;
@@ -230,15 +232,19 @@ void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
+ struct task_struct *task;
unsigned long diff;
__ib_umem_release(umem->context->device, umem, 1);
- mm = get_task_mm(current);
- if (!mm) {
- kfree(umem);
- return;
- }
+ task = get_pid_task(umem->pid, PIDTYPE_PID);
+ put_pid(umem->pid);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
@@ -262,9 +268,10 @@ void ib_umem_release(struct ib_umem *umem)
} else
down_write(&mm->mmap_sem);
- current->mm->pinned_vm -= diff;
+ mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
+out:
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1acb99100556..928cdd20e2d1 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
* SOFTWARE.
*/
+#define pr_fmt(fmt) "user_mad: " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
@@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
- if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
- copy_offset = IB_MGMT_MAD_HDR;
- rmpp_active = 0;
- } else {
+
+ if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
- IB_MGMT_RMPP_FLAG_ACTIVE;
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ } else {
+ copy_offset = IB_MGMT_MAD_HDR;
+ rmpp_active = 0;
}
data_len = count - hdr_size(file) - hdr_len;
@@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
rmpp_mad->mad_hdr.tid = *tid;
}
- spin_lock_irq(&file->send_lock);
- ret = is_duplicate(file, packet);
- if (!ret)
+ if (!ib_mad_kernel_rmpp_agent(agent)
+ && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
- spin_unlock_irq(&file->send_lock);
- if (ret) {
- ret = -EINVAL;
- goto err_msg;
+ spin_unlock_irq(&file->send_lock);
+ } else {
+ spin_lock_irq(&file->send_lock);
+ ret = is_duplicate(file, packet);
+ if (!ret)
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_msg;
+ }
}
ret = ib_post_send_mad(packet->msg, NULL);
@@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
}
@@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: invalid QPN %d specified\n",
+ ureq.qpn);
ret = -EINVAL;
goto out;
}
@@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
found:
if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -657,7 +678,7 @@ found:
ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
ureq.mgmt_class ? &req : NULL,
ureq.rmpp_version,
- send_handler, recv_handler, file);
+ send_handler, recv_handler, file, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
agent = NULL;
@@ -673,10 +694,11 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- printk(KERN_WARNING "user_mad: process %s did not enable "
- "P_Key index support.\n", current->comm);
- printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
- "has info on the new ABI.\n");
+ dev_warn(file->port->dev,
+ "process %s did not enable P_Key index support.\n",
+ current->comm);
+ dev_warn(file->port->dev,
+ " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -694,6 +716,119 @@ out:
return ret;
}
+static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
+{
+ struct ib_user_mad_reg_req2 ureq;
+ struct ib_mad_reg_req req;
+ struct ib_mad_agent *agent = NULL;
+ int agent_id;
+ int ret;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (!file->port->ib_dev) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid device\n");
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: invalid QPN %d specified\n",
+ ureq.qpn);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ ret = -EINVAL;
+
+ if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
+ (u32 __user *) (arg + offsetof(struct
+ ib_user_mad_reg_req2, flags))))
+ ret = -EFAULT;
+
+ goto out;
+ }
+
+ for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+ if (!__get_agent(file, agent_id))
+ goto found;
+
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
+ ret = -ENOMEM;
+ goto out;
+
+found:
+ if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
+ req.mgmt_class = ureq.mgmt_class;
+ req.mgmt_class_version = ureq.mgmt_class_version;
+ if (ureq.oui & 0xff000000) {
+ dev_notice(file->port->dev,
+ "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ ureq.oui);
+ ret = -EINVAL;
+ goto out;
+ }
+ req.oui[2] = ureq.oui & 0x0000ff;
+ req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
+ req.oui[0] = (ureq.oui & 0xff0000) >> 16;
+ memcpy(req.method_mask, ureq.method_mask,
+ sizeof(req.method_mask));
+ }
+
+ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+ ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+ ureq.mgmt_class ? &req : NULL,
+ ureq.rmpp_version,
+ send_handler, recv_handler, file,
+ ureq.flags);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ agent = NULL;
+ goto out;
+ }
+
+ if (put_user(agent_id,
+ (u32 __user *)(arg +
+ offsetof(struct ib_user_mad_reg_req2, id)))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (!file->already_used) {
+ file->already_used = 1;
+ file->use_pkey_index = 1;
+ }
+
+ file->agent[agent_id] = agent;
+ ret = 0;
+
+out:
+ mutex_unlock(&file->mutex);
+
+ if (ret && agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
+
static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
{
struct ib_mad_agent *agent = NULL;
@@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
default:
return -ENOIOCTLCMD;
}
@@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_REGISTER_AGENT2:
+ return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
default:
return -ENOIOCTLCMD;
}
@@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(void)
+static int find_overflow_devnum(struct ib_device *device)
{
int ret;
@@ -991,7 +1130,8 @@ static int find_overflow_devnum(void)
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ dev_err(&device->dev,
+ "couldn't register dynamic device number\n");
return ret;
}
}
@@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- devnum = find_overflow_devnum();
+ devnum = find_overflow_devnum(device);
if (devnum < 0)
return -1;
@@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void)
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register device number\n");
+ pr_err("couldn't register device number\n");
goto out;
}
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
- printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
@@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void)
ret = class_create_file(umad_class, &class_attr_abi_version.attr);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ pr_err("couldn't create abi_version attribute\n");
goto out_class;
}
ret = ib_register_client(&umad_client);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ pr_err("couldn't register ib_umad client\n");
goto out_class;
}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274a5a09..643c08a025a5 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203ee7bcc..5ba2a86aab6a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
return ret;
}
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_rereg_mr cmd;
+ struct ib_uverbs_rereg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr;
+ struct ib_pd *old_pd;
+ int ret;
+ struct ib_uobject *uobj;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ return -EINVAL;
+
+ if ((cmd.flags & IB_MR_REREG_TRANS) &&
+ (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+ file->ucontext);
+
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ if (cmd.flags & IB_MR_REREG_ACCESS) {
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ goto put_uobjs;
+ }
+
+ if (cmd.flags & IB_MR_REREG_PD) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+ }
+
+ if (atomic_read(&mr->usecnt)) {
+ ret = -EBUSY;
+ goto put_uobj_pd;
+ }
+
+ old_pd = mr->pd;
+ ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd, &udata);
+ if (!ret) {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
+ }
+ } else {
+ goto put_uobj_pd;
+ }
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+ else
+ ret = in_len;
+
+put_uobj_pd:
+ if (cmd.flags & IB_MR_REREG_PD)
+ put_pd_read(pd);
+
+put_uobjs:
+
+ put_uobj_write(mr->uobject);
+
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -2425,6 +2518,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
attr.grh.sgid_index = cmd.attr.grh.sgid_index;
attr.grh.hop_limit = cmd.attr.grh.hop_limit;
attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+ attr.vlan_id = 0;
+ memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
ah = ib_create_ah(pd, &attr);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb3338b..71ab83fde472 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
[IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
@@ -476,6 +477,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.element = element;
entry->desc.async.event_type = event;
+ entry->desc.async.reserved = 0;
entry->counter = counter;
list_add_tail(&entry->list, &file->async_file->event_list);
@@ -501,6 +503,10 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
+ /* for XRC target qp's, check that qp is live */
+ if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ return;
+
uobj = container_of(event->element.qp->uobject,
struct ib_uevent_object, uobject);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index e7bee46868d1..abd97247443e 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -140,5 +140,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
+
+ memset(dst->smac, 0, sizeof(dst->smac));
+ memset(dst->dmac, 0, sizeof(dst->dmac));
+ dst->vlan_id = 0xffff;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 00400c352c1a..766a71ccefed 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -604,16 +604,14 @@ static int c2_up(struct net_device *netdev)
tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
c2_port->mem_size = tx_size + rx_size;
- c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
- &c2_port->dma);
+ c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
+ &c2_port->dma);
if (c2_port->mem == NULL) {
pr_debug("Unable to allocate memory for "
"host descriptor rings\n");
return -ENOMEM;
}
- memset(c2_port->mem, 0, c2_port->mem_size);
-
/* Create the Rx host descriptor ring */
if ((ret =
c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 49e0e8533f74..1b63185b4ad4 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
mq->msg_pool.host, dma_unmap_addr(mq, mapping));
}
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
- int msg_size)
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
+ size_t q_size, size_t msg_size)
{
u8 *pool_start;
+ if (q_size > SIZE_MAX / msg_size)
+ return -EINVAL;
+
pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
&mq->host_dma, GFP_KERNEL);
if (!pool_start)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index c2fb71c182a8..fb61f6685809 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -236,10 +236,12 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
static void set_emss(struct c4iw_ep *ep, u16 opt)
{
ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
- sizeof(struct iphdr) - sizeof(struct tcphdr);
+ ((AF_INET == ep->com.remote_addr.ss_family) ?
+ sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
+ sizeof(struct tcphdr);
ep->mss = ep->emss;
if (GET_TCPOPT_TSTAMP(opt))
- ep->emss -= 12;
+ ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
@@ -415,6 +417,7 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
return NULL;
if (!our_interface(dev, n->dev) &&
!(n->dev->flags & IFF_LOOPBACK)) {
+ neigh_release(n);
dst_release(&rt->dst);
return NULL;
}
@@ -581,11 +584,14 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep,
}
static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- unsigned int *idx, int use_ts)
+ unsigned int *idx, int use_ts, int ipv6)
{
- unsigned short hdr_size = sizeof(struct iphdr) +
+ unsigned short hdr_size = (ipv6 ?
+ sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr)) +
sizeof(struct tcphdr) +
- (use_ts ? 12 : 0);
+ (use_ts ?
+ round_up(TCPOLEN_TIMESTAMP, 4) : 0);
unsigned short data_size = mtu - hdr_size;
cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
@@ -634,7 +640,8 @@ static int send_connect(struct c4iw_ep *ep)
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps);
+ enable_tcp_timestamps,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
@@ -668,6 +675,7 @@ static int send_connect(struct c4iw_ep *ep)
if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
opt2 |= T5_OPT_2_VALID;
opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
}
t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
@@ -713,8 +721,6 @@ static int send_connect(struct c4iw_ep *ep)
} else {
u32 isn = (prandom_u32() & ~7UL) - 1;
- opt2 |= T5_OPT_2_VALID;
- opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
if (peer2peer)
isn += 4;
@@ -756,10 +762,10 @@ static int send_connect(struct c4iw_ep *ep)
t5_req6->peer_ip_lo = *((__be64 *)
(ra6->sin6_addr.s6_addr + 8));
t5_req6->opt0 = cpu_to_be64(opt0);
- t5_req6->params = (__force __be64)cpu_to_be32(
+ t5_req6->params = cpu_to_be64(V_FILTER_TUPLE(
cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ ep->l2t)));
t5_req6->rsvd = cpu_to_be32(isn);
PDBG("%s snd_isn %u\n", __func__,
be32_to_cpu(t5_req6->rsvd));
@@ -1763,7 +1769,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps);
+ enable_tcp_timestamps,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
@@ -2162,7 +2169,8 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
ep->hwtid));
best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps && req->tcpopt.tstamp);
+ enable_tcp_timestamps && req->tcpopt.tstamp,
+ (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
wscale = compute_wscale(rcv_win);
/*
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index f25df5276c22..72f1f052e88c 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -60,7 +60,7 @@ int c4iw_wr_log = 0;
module_param(c4iw_wr_log, int, 0444);
MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
-int c4iw_wr_log_size_order = 12;
+static int c4iw_wr_log_size_order = 12;
module_param(c4iw_wr_log_size_order, int, 0444);
MODULE_PARM_DESC(c4iw_wr_log_size_order,
"Number of entries (log2) in the work request timing log.");
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index fbe6051af254..c9df0549f51d 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
chp = get_chp(dev, qid);
if (chp) {
+ t4_clear_cq_armed(&chp->cq);
spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index c158fcc02bca..41cd6882b648 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
struct c4iw_cq *schp)
{
int count;
- int flushed;
+ int rq_flushed, sq_flushed;
unsigned long flag;
PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
@@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
c4iw_flush_hw_cq(rchp);
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&rchp->comp_handler_lock, flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
- }
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, flag);
spin_lock(&qhp->lock);
if (schp != rchp)
c4iw_flush_hw_cq(schp);
- flushed = c4iw_flush_sq(qhp);
+ sq_flushed = c4iw_flush_sq(qhp);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, flag);
- if (flushed) {
- spin_lock_irqsave(&schp->comp_handler_lock, flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+
+ if (schp == rchp) {
+ if (t4_clear_cq_armed(&rchp->cq) &&
+ (rq_flushed || sq_flushed)) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ } else {
+ if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+ if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index df5edfa31a8f..c04e5134b30c 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq)
return !wq->rq.queue[wq->rq.size].status.db_off;
}
+enum t4_cq_flags {
+ CQ_ARMED = 1,
+};
+
struct t4_cq {
struct t4_cqe *queue;
dma_addr_t dma_addr;
@@ -544,12 +548,19 @@ struct t4_cq {
u16 cidx_inc;
u8 gen;
u8 error;
+ unsigned long flags;
};
+static inline int t4_clear_cq_armed(struct t4_cq *cq)
+{
+ return test_and_clear_bit(CQ_ARMED, &cq->flags);
+}
+
static inline int t4_arm_cq(struct t4_cq *cq, int se)
{
u32 val;
+ set_bit(CQ_ARMED, &cq->flags);
while (cq->cidx_inc > CIDXINC_MASK) {
val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
INGRESSQID(cq->cqid);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index e0c404bdc4a8..4977082e081f 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- *dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 43f2d0424d4f..e890e5ba0e01 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -726,7 +726,7 @@ bail:
* @dd: the infinipath device
* @pkeys: the PKEY table
*/
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
{
struct ipath_portdata *pd;
int i;
@@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
}
if (changed) {
u64 pkey;
+ struct ib_event event;
pkey = (u64) dd->ipath_pkeys[0] |
((u64) dd->ipath_pkeys[1] << 16) |
@@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
(unsigned long long) pkey);
ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
pkey);
+
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.device = &dd->verbs_dev->ibdev;
+ event.element.port_num = port;
+ ib_dispatch_event(&event);
}
return 0;
}
static int recv_subn_set_pkeytable(struct ib_smp *smp,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u8 port)
{
u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
__be16 *p = (__be16 *) smp->data;
@@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
for (i = 0; i < n; i++)
q[i] = be16_to_cpu(p[i]);
- if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
+ if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
smp->status |= IB_SMP_INVALID_FIELD;
return recv_subn_get_pkeytable(smp, ibdev);
@@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
ret = recv_subn_set_portinfo(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_PKEY_TABLE:
- ret = recv_subn_set_pkeytable(smp, ibdev);
+ ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
goto bail;
case IB_SMP_ATTR_SM_INFO:
if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index dc66c4506916..1da1252dcdb3 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -54,7 +54,7 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages,
/* call with current->mm->mmap_sem held */
static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+ struct page **p)
{
unsigned long lock_limit;
size_t got;
@@ -74,7 +74,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
- p + got, vma);
+ p + got, NULL);
if (ret < 0)
goto bail_release;
}
@@ -165,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __ipath_get_user_pages(start_page, num_pages, p, NULL);
+ ret = __ipath_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad0564acd..82a7dd87089b 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0f7027e7db13..8b72cf392b34 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -59,6 +59,7 @@
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
+#define MLX4_IB_CARD_REV_A0 0xA0
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -119,6 +120,17 @@ static int check_flow_steering_support(struct mlx4_dev *dev)
return dmfs;
}
+static int num_ib_ports(struct mlx4_dev *dev)
+{
+ int ib_ports = 0;
+ int i;
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_ports++;
+
+ return ib_ports;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -126,6 +138,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
+ int have_ib_ports;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -142,6 +155,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
memset(props, 0, sizeof *props);
+ have_ib_ports = num_ib_ports(dev->dev);
+
props->fw_ver = dev->dev->caps.fw_ver;
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
@@ -152,13 +167,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
- if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
+ if (dev->dev->caps.max_gso_sz &&
+ (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
+ (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
@@ -357,7 +374,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
- spin_lock(&iboe->lock);
+ spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
if (!ndev)
goto out_unlock;
@@ -369,7 +386,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
out_unlock:
- spin_unlock(&iboe->lock);
+ spin_unlock_bh(&iboe->lock);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -811,11 +828,11 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
if (!mqp->port)
return 0;
- spin_lock(&mdev->iboe.lock);
+ spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
- spin_unlock(&mdev->iboe.lock);
+ spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
ret = 1;
@@ -910,8 +927,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
const struct default_rules *pdefault_rules = default_table;
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
- for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
- pdefault_rules++) {
+ for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
memset(&field_types, 0, sizeof(field_types));
@@ -965,8 +981,7 @@ static int __mlx4_ib_create_default_rules(
int size = 0;
int i;
- for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
- sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
int ret;
union ib_flow_spec ib_spec;
switch (pdefault_rules->rules_create_list[i]) {
@@ -1091,6 +1106,30 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
return err;
}
+static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+ u64 *reg_id)
+{
+ void *ib_flow;
+ union ib_flow_spec *ib_spec;
+ struct mlx4_dev *dev = to_mdev(qp->device)->dev;
+ int err = 0;
+
+ if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ return 0; /* do nothing */
+
+ ib_flow = flow_attr + 1;
+ ib_spec = (union ib_flow_spec *)ib_flow;
+
+ if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
+ return 0; /* do nothing */
+
+ err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
+ flow_attr->port, qp->qp_num,
+ MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
+ reg_id);
+ return err;
+}
+
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
@@ -1134,12 +1173,24 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
&mflow->reg_id[i]);
if (err)
- goto err_free;
+ goto err_create_flow;
+ i++;
+ }
+
+ if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
+ if (err)
+ goto err_create_flow;
i++;
}
return &mflow->ibflow;
+err_create_flow:
+ while (i) {
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
+ i--;
+ }
err_free:
kfree(mflow);
return ERR_PTR(err);
@@ -1264,11 +1315,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
- spin_lock(&mdev->iboe.lock);
+ spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
if (ndev)
dev_hold(ndev);
- spin_unlock(&mdev->iboe.lock);
+ spin_unlock_bh(&mdev->iboe.lock);
if (ndev)
dev_put(ndev);
list_del(&ge->list);
@@ -1389,6 +1440,9 @@ static void update_gids_task(struct work_struct *work)
int err;
struct mlx4_dev *dev = gw->dev->dev;
+ if (!gw->dev->ib_active)
+ return;
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
@@ -1419,6 +1473,9 @@ static void reset_gids_task(struct work_struct *work)
int err;
struct mlx4_dev *dev = gw->dev->dev;
+ if (!gw->dev->ib_active)
+ return;
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("reset gid table failed\n");
@@ -1553,7 +1610,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
return 0;
iboe = &ibdev->iboe;
- spin_lock(&iboe->lock);
+ spin_lock_bh(&iboe->lock);
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
if ((netif_is_bond_master(real_dev) &&
@@ -1563,7 +1620,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
update_gid_table(ibdev, port, gid,
event == NETDEV_DOWN, 0);
- spin_unlock(&iboe->lock);
+ spin_unlock_bh(&iboe->lock);
return 0;
}
@@ -1636,13 +1693,21 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
new_smac = mlx4_mac_to_u64(dev->dev_addr);
read_unlock(&dev_base_lock);
+ atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
+
+ /* no need for update QP1 and mac registration in non-SRIOV */
+ if (!mlx4_is_mfunc(ibdev->dev))
+ return;
+
mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
qp = ibdev->qp1_proxy[port - 1];
if (qp) {
int new_smac_index;
- u64 old_smac = qp->pri.smac;
+ u64 old_smac;
struct mlx4_update_qp_params update_params;
+ mutex_lock(&qp->mutex);
+ old_smac = qp->pri.smac;
if (new_smac == old_smac)
goto unlock;
@@ -1652,22 +1717,25 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
goto unlock;
update_params.smac_index = new_smac_index;
- if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC,
+ if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
&update_params)) {
release_mac = new_smac;
goto unlock;
}
-
+ /* if old port was zero, no mac was yet registered for this QP */
+ if (qp->pri.smac_port)
+ release_mac = old_smac;
qp->pri.smac = new_smac;
+ qp->pri.smac_port = port;
qp->pri.smac_index = new_smac_index;
-
- release_mac = old_smac;
}
unlock:
- mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
if (release_mac != MLX4_IB_INVALID_MAC)
mlx4_unregister_mac(ibdev->dev, port, release_mac);
+ if (qp)
+ mutex_unlock(&qp->mutex);
+ mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
static void mlx4_ib_get_dev_addr(struct net_device *dev,
@@ -1678,6 +1746,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev,
struct inet6_dev *in6_dev;
union ib_gid *pgid;
struct inet6_ifaddr *ifp;
+ union ib_gid default_gid;
#endif
union ib_gid gid;
@@ -1698,12 +1767,15 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev,
in_dev_put(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
+ mlx4_make_default_gid(dev, &default_gid);
/* IPv6 gids */
in6_dev = in6_dev_get(dev);
if (in6_dev) {
read_lock_bh(&in6_dev->lock);
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
pgid = (union ib_gid *)&ifp->addr;
+ if (!memcmp(pgid, &default_gid, sizeof(*pgid)))
+ continue;
update_gid_table(ibdev, port, pgid, 0, 0);
}
read_unlock_bh(&in6_dev->lock);
@@ -1725,24 +1797,33 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
struct net_device *dev;
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
int i;
+ int err = 0;
- for (i = 1; i <= ibdev->num_ports; ++i)
- if (reset_gid_table(ibdev, i))
- return -1;
+ for (i = 1; i <= ibdev->num_ports; ++i) {
+ if (rdma_port_get_link_layer(&ibdev->ib_dev, i) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = reset_gid_table(ibdev, i);
+ if (err)
+ goto out;
+ }
+ }
read_lock(&dev_base_lock);
- spin_lock(&iboe->lock);
+ spin_lock_bh(&iboe->lock);
for_each_netdev(&init_net, dev) {
u8 port = mlx4_ib_get_dev_port(dev, ibdev);
- if (port)
+ /* port will be non-zero only for ETH ports */
+ if (port) {
+ mlx4_ib_set_default_gid(ibdev, dev, port);
mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
}
- spin_unlock(&iboe->lock);
+ spin_unlock_bh(&iboe->lock);
read_unlock(&dev_base_lock);
-
- return 0;
+out:
+ return err;
}
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
@@ -1756,7 +1837,7 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
iboe = &ibdev->iboe;
- spin_lock(&iboe->lock);
+ spin_lock_bh(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
enum ib_port_state port_state = IB_PORT_NOP;
struct net_device *old_master = iboe->masters[port - 1];
@@ -1788,35 +1869,47 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- } else {
- reset_gid_table(ibdev, port);
- }
- /* if using bonding/team and a slave port is down, we don't the bond IP
- * based gids in the table since flows that select port by gid may get
- * the down port.
- */
- if (curr_master && (port_state == IB_PORT_DOWN)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- }
- /* if bonding is used it is possible that we add it to masters
- * only after IP address is assigned to the net bonding
- * interface.
- */
- if (curr_master && (old_master != curr_master)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_master, ibdev, port);
- }
+ if (curr_master) {
+ /* if using bonding/team and a slave port is down, we
+ * don't want the bond IP based gids in the table since
+ * flows that select port by gid may get the down port.
+ */
+ if (port_state == IB_PORT_DOWN) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev,
+ curr_netdev,
+ port);
+ } else {
+ /* gids from the upper dev (bond/team)
+ * should appear in port's gid table
+ */
+ mlx4_ib_get_dev_addr(curr_master,
+ ibdev, port);
+ }
+ }
+ /* if bonding is used it is possible that we add it to
+ * masters only after IP address is assigned to the
+ * net bonding interface.
+ */
+ if (curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev,
+ curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_master, ibdev, port);
+ }
- if (!curr_master && (old_master != curr_master)) {
+ if (!curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev,
+ curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
+ }
+ } else {
reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
}
}
- spin_unlock(&iboe->lock);
+ spin_unlock_bh(&iboe->lock);
if (update_qps_port > 0)
mlx4_ib_update_qps(ibdev, dev, update_qps_port);
@@ -2007,6 +2100,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -2059,6 +2153,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
+ ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
@@ -2156,6 +2251,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_steer_free_bitmap;
}
+ for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
+ atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
+
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_steer_free_bitmap;
@@ -2192,12 +2290,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
}
#endif
- for (i = 1 ; i <= ibdev->num_ports ; ++i)
- reset_gid_table(ibdev, i);
- rtnl_lock();
- mlx4_ib_scan_netdevs(ibdev, NULL, 0);
- rtnl_unlock();
- mlx4_ib_init_gid_table(ibdev);
+ if (mlx4_ib_init_gid_table(ibdev))
+ goto err_notif;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -2345,6 +2439,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
+ ibdev->ib_active = false;
+ flush_workqueue(wq);
+
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 369da3ca5d64..6eb743f65f6f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -451,6 +451,7 @@ struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
+ atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
struct notifier_block nb_inet6;
@@ -788,5 +789,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index cb2a8727f3fb..8f9325cfc85d 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
+ /* Force registering the memory as writable. */
+ /* Used for memory re-registeration. HCA protects the access */
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ access_flags | IB_ACCESS_LOCAL_WRITE, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -183,6 +185,93 @@ err_free:
return ERR_PTR(err);
}
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+ u64 start, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(mr->device);
+ struct mlx4_ib_mr *mmr = to_mmr(mr);
+ struct mlx4_mpt_entry *mpt_entry;
+ struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
+ int err;
+
+ /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
+ * we assume that the calls can't run concurrently. Otherwise, a
+ * race exists.
+ */
+ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
+
+ if (err)
+ return err;
+
+ if (flags & IB_MR_REREG_PD) {
+ err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
+ to_mpd(pd)->pdn);
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
+ convert_access(mr_access_flags));
+
+ if (err)
+ goto release_mpt_entry;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ int shift;
+ int err;
+ int n;
+
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ mmr->umem = ib_umem_get(mr->uobject->context, start, length,
+ mr_access_flags |
+ IB_ACCESS_LOCAL_WRITE,
+ 0);
+ if (IS_ERR(mmr->umem)) {
+ err = PTR_ERR(mmr->umem);
+ /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
+ mmr->umem = NULL;
+ goto release_mpt_entry;
+ }
+ n = ib_umem_page_count(mmr->umem);
+ shift = ilog2(mmr->umem->page_size);
+
+ err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
+ virt_addr, length, n, shift,
+ *pmpt_entry);
+ if (err) {
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+ mmr->mmr.iova = virt_addr;
+ mmr->mmr.size = length;
+
+ err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
+ if (err) {
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+ ib_umem_release(mmr->umem);
+ goto release_mpt_entry;
+ }
+ }
+
+ /* If we couldn't transfer the MR to the HCA, just remember to
+ * return a failure. But dereg_mr will free the resources.
+ */
+ err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
+ if (!err && flags & IB_MR_REREG_ACCESS)
+ mmr->mmr.access = mr_access_flags;
+
+release_mpt_entry:
+ mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
+
+ return err;
+}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 67780452f0cf..9c5150c3cb31 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -964,9 +964,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
- if (qp->pri.smac) {
+ if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
+ qp->pri.smac_port = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
@@ -1325,7 +1326,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
* If one was already assigned, but the new mac differs,
* unregister the old one and register the new one.
*/
- if (!smac_info->smac || smac_info->smac != smac) {
+ if ((!smac_info->smac && !smac_info->smac_port) ||
+ smac_info->smac != smac) {
/* register candidate now, unreg if needed, after success */
smac_index = mlx4_register_mac(dev->dev, port, smac);
if (smac_index >= 0) {
@@ -1390,21 +1392,13 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
struct mlx4_qp_context *context)
{
- struct net_device *ndev;
u64 u64_mac;
int smac_index;
-
- ndev = dev->iboe.netdevs[qp->port - 1];
- if (ndev) {
- smac = ndev->dev_addr;
- u64_mac = mlx4_mac_to_u64(smac);
- } else {
- u64_mac = dev->dev->caps.def_mac[qp->port];
- }
+ u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]);
context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
- if (!qp->pri.smac) {
+ if (!qp->pri.smac && !qp->pri.smac_port) {
smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
if (smac_index >= 0) {
qp->pri.candidate_smac_index = smac_index;
@@ -1432,6 +1426,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int steer_qp = 0;
int err = -EINVAL;
+ /* APM is not supported under RoCE */
+ if (attr_mask & IB_QP_ALT_PATH &&
+ rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET)
+ return -ENOTSUPP;
+
context = kzalloc(sizeof *context, GFP_KERNEL);
if (!context)
return -ENOMEM;
@@ -1677,9 +1677,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
+ if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ /* set QP to receive both tunneled & non-tunneled packets */
+ if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET)))
+ context->srqn = cpu_to_be32(7 << 28);
+ }
+ }
if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
int is_eth = rdma_port_get_link_layer(
@@ -1780,9 +1786,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_reg(dev, qp, 0);
}
- if (qp->pri.smac) {
+ if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
+ qp->pri.smac_port = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
@@ -1806,11 +1813,12 @@ out:
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
- if (qp->pri.candidate_smac) {
+ if (qp->pri.candidate_smac ||
+ (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) {
if (err) {
mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
} else {
- if (qp->pri.smac)
+ if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port))
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = qp->pri.candidate_smac;
qp->pri.smac_index = qp->pri.candidate_smac_index;
@@ -2083,6 +2091,16 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
+static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
+{
+ int i;
+
+ for (i = ETH_ALEN; i; i--) {
+ dst_mac[i - 1] = src_mac & 0xff;
+ src_mac >>= 8;
+ }
+}
+
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
@@ -2197,7 +2215,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
if (is_eth) {
- u8 *smac;
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
@@ -2210,12 +2227,17 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
- smac = to_mdev(sqp->qp.ibqp.device)->
- iboe.netdevs[sqp->qp.port - 1]->dev_addr;
- else /* use the src mac of the tunnel */
- smac = ah->av.eth.s_mac;
- memcpy(sqp->ud_header.eth.smac_h, smac, 6);
+ if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
+ u8 smac[ETH_ALEN];
+
+ mlx4_u64_to_smac(smac, mac);
+ memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
+ } else {
+ /* use the src mac of the tunnel */
+ memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
+ }
+
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index e4056279166d..10cfce5119a9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
return ERR_PTR(-EINVAL);
entries = roundup_pow_of_two(entries + 1);
- if (entries > dev->mdev->caps.max_cqes)
+ if (entries > dev->mdev->caps.gen.max_cqes)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int err;
u32 fsel;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+ if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
return -ENOSYS;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int uninitialized_var(cqe_size);
unsigned long flags;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+ if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
pr_info("Firmware does not support resize CQ\n");
return -ENOSYS;
}
@@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
- if (entries > dev->mdev->caps.max_cqes + 1)
+ if (entries > dev->mdev->caps.gen.max_cqes + 1)
return -EINVAL;
if (entries == ibcq->cqe + 1)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b514bbb5610f..657af9a1167c 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+ dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d8907b20522a..1ba6c42e4df8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ struct mlx5_general_caps *gen;
int err = -ENOMEM;
int max_rq_sg;
int max_sq_sg;
u64 flags;
+ gen = &dev->mdev->caps.gen;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
- flags = dev->mdev->caps.flags;
+ flags = gen->flags;
if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -213,30 +215,31 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
- props->page_size_cap = dev->mdev->caps.min_page_sz;
- props->max_qp = 1 << dev->mdev->caps.log_max_qp;
- props->max_qp_wr = dev->mdev->caps.max_wqes;
- max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
- max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+ props->page_size_cap = gen->min_page_sz;
+ props->max_qp = 1 << gen->log_max_qp;
+ props->max_qp_wr = gen->max_wqes;
+ max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+ max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
- props->max_cq = 1 << dev->mdev->caps.log_max_cq;
- props->max_cqe = dev->mdev->caps.max_cqes - 1;
- props->max_mr = 1 << dev->mdev->caps.log_max_mkey;
- props->max_pd = 1 << dev->mdev->caps.log_max_pd;
- props->max_qp_rd_atom = dev->mdev->caps.max_ra_req_qp;
- props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp;
+ props->max_cq = 1 << gen->log_max_cq;
+ props->max_cqe = gen->max_cqes - 1;
+ props->max_mr = 1 << gen->log_max_mkey;
+ props->max_pd = 1 << gen->log_max_pd;
+ props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp;
+ props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp;
+ props->max_srq = 1 << gen->log_max_srq;
+ props->max_srq_wr = gen->max_srq_wqes - 1;
+ props->local_ca_ack_delay = gen->local_ca_ack_delay;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = 1 << dev->mdev->caps.log_max_srq;
- props->max_srq_wr = dev->mdev->caps.max_srq_wqes - 1;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->local_ca_ack_delay = dev->mdev->caps.local_ca_ack_delay;
+ props->local_ca_ack_delay = gen->local_ca_ack_delay;
props->atomic_cap = IB_ATOMIC_NONE;
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
- props->max_mcast_grp = 1 << dev->mdev->caps.log_max_mcg;
- props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg;
+ props->max_mcast_grp = 1 << gen->log_max_mcg;
+ props->max_mcast_qp_attach = gen->max_qp_mcg;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
@@ -254,10 +257,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ struct mlx5_general_caps *gen;
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > dev->mdev->caps.num_ports) {
+ gen = &dev->mdev->caps.gen;
+ if (port < 1 || port > gen->num_ports) {
mlx5_ib_warn(dev, "invalid port number %d\n", port);
return -EINVAL;
}
@@ -288,8 +293,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
- props->max_msg_sz = 1 << to_mdev(ibdev)->mdev->caps.log_max_msg;
- props->pkey_tbl_len = to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len;
+ props->max_msg_sz = 1 << gen->log_max_msg;
+ props->pkey_tbl_len = gen->port[port - 1].pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -316,7 +321,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (dev->mdev->caps.ext_port_cap[port - 1] &
+ if (gen->ext_port_cap[port - 1] &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
@@ -470,6 +475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_alloc_ucontext_req_v2 req;
struct mlx5_ib_alloc_ucontext_resp resp;
struct mlx5_ib_ucontext *context;
+ struct mlx5_general_caps *gen;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
@@ -480,6 +486,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
int i;
size_t reqlen;
+ gen = &dev->mdev->caps.gen;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
@@ -512,14 +519,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
- resp.qp_tab_size = 1 << dev->mdev->caps.log_max_qp;
- resp.bf_reg_size = dev->mdev->caps.bf_reg_size;
+ resp.qp_tab_size = 1 << gen->log_max_qp;
+ resp.bf_reg_size = gen->bf_reg_size;
resp.cache_line_size = L1_CACHE_BYTES;
- resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz;
- resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz;
- resp.max_send_wqebb = dev->mdev->caps.max_wqes;
- resp.max_recv_wr = dev->mdev->caps.max_wqes;
- resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes;
+ resp.max_sq_desc_sz = gen->max_sq_desc_sz;
+ resp.max_rq_desc_sz = gen->max_rq_desc_sz;
+ resp.max_send_wqebb = gen->max_wqes;
+ resp.max_recv_wr = gen->max_wqes;
+ resp.max_srq_recv_wr = gen->max_srq_wqes;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -565,7 +572,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
- resp.num_ports = dev->mdev->caps.num_ports;
+ resp.num_ports = gen->num_ports;
err = ib_copy_to_udata(udata, &resp,
sizeof(resp) - sizeof(resp.reserved));
if (err)
@@ -650,13 +657,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return -EINVAL;
idx = get_index(vma->vm_pgoff);
+ if (idx >= uuari->num_uars)
+ return -EINVAL;
+
pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
(unsigned long long)pfn);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
@@ -967,9 +974,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
+ struct mlx5_general_caps *gen;
int port;
- for (port = 1; port <= dev->mdev->caps.num_ports; port++)
+ gen = &dev->mdev->caps.gen;
+ for (port = 1; port <= gen->num_ports; port++)
mlx5_query_ext_port_caps(dev, port);
}
@@ -977,9 +986,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
+ struct mlx5_general_caps *gen;
int err = 0;
int port;
+ gen = &dev->mdev->caps.gen;
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
@@ -994,14 +1005,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
goto out;
}
- for (port = 1; port <= dev->mdev->caps.num_ports; port++) {
+ for (port = 1; port <= gen->num_ports; port++) {
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
break;
}
- dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
- dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+ gen->port[port - 1].pkey_table_len = dprops->max_pkeys;
+ gen->port[port - 1].gid_table_len = pprops->gid_tbl_len;
mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
dprops->max_pkeys, pprops->gid_tbl_len);
}
@@ -1279,8 +1290,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
- dev->num_ports = mdev->caps.num_ports;
+ dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey;
+ dev->num_ports = mdev->caps.gen.num_ports;
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
@@ -1355,7 +1366,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+ if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
@@ -1414,8 +1425,8 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
- destroy_umrc_res(dev);
ib_unregister_device(&dev->ib_dev);
+ destroy_umrc_res(dev);
destroy_dev_resources(&dev->devr);
free_comp_eqs(dev);
ib_dealloc_device(&dev->ib_dev);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index a3e81444c825..dae07eae9507 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -55,16 +55,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
u64 pfn;
struct scatterlist *sg;
int entry;
+ unsigned long page_shift = ilog2(umem->page_size);
- addr = addr >> PAGE_SHIFT;
+ addr = addr >> page_shift;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ len = sg_dma_len(sg) >> page_shift;
+ pfn = sg_dma_address(sg) >> page_shift;
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
@@ -103,14 +104,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
*ncont = 0;
}
- *shift = PAGE_SHIFT + m;
+ *shift = page_shift + m;
*count = i;
}
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int umr)
{
- int shift = page_shift - PAGE_SHIFT;
+ unsigned long umem_page_shift = ilog2(umem->page_size);
+ int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k;
u64 cur = 0;
@@ -121,11 +123,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
+ len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
+ cur = base + (k << umem_page_shift);
if (umr)
cur |= 3;
@@ -134,7 +136,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << PAGE_SHIFT));
+ base + (k << umem_page_shift));
i++;
}
}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 80b3c63eab5d..8ee7cb46e059 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -881,12 +881,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int order;
int err;
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
- start, virt_addr, length);
+ mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, virt_addr, length, access_flags);
umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
0);
if (IS_ERR(umem)) {
- mlx5_ib_dbg(dev, "umem get failed\n");
+ mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
return (void *)umem;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7efe6e3f3542..e261a53f9a02 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -158,11 +158,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
{
+ struct mlx5_general_caps *gen;
int wqe_size;
int wq_size;
+ gen = &dev->mdev->caps.gen;
/* Sanity check RQ size before proceeding */
- if (cap->max_recv_wr > dev->mdev->caps.max_wqes)
+ if (cap->max_recv_wr > gen->max_wqes)
return -EINVAL;
if (!has_rq) {
@@ -182,10 +184,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
qp->rq.wqe_cnt = wq_size / wqe_size;
- if (wqe_size > dev->mdev->caps.max_rq_desc_sz) {
+ if (wqe_size > gen->max_rq_desc_sz) {
mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
wqe_size,
- dev->mdev->caps.max_rq_desc_sz);
+ gen->max_rq_desc_sz);
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
@@ -266,9 +268,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
struct mlx5_ib_qp *qp)
{
+ struct mlx5_general_caps *gen;
int wqe_size;
int wq_size;
+ gen = &dev->mdev->caps.gen;
if (!attr->cap.max_send_wr)
return 0;
@@ -277,9 +281,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
if (wqe_size < 0)
return wqe_size;
- if (wqe_size > dev->mdev->caps.max_sq_desc_sz) {
+ if (wqe_size > gen->max_sq_desc_sz) {
mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
- wqe_size, dev->mdev->caps.max_sq_desc_sz);
+ wqe_size, gen->max_sq_desc_sz);
return -EINVAL;
}
@@ -292,9 +296,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
- if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+ if (qp->sq.wqe_cnt > gen->max_wqes) {
mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
- qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+ qp->sq.wqe_cnt, gen->max_wqes);
return -ENOMEM;
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -309,11 +313,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
struct mlx5_ib_create_qp *ucmd)
{
+ struct mlx5_general_caps *gen;
int desc_sz = 1 << qp->sq.wqe_shift;
- if (desc_sz > dev->mdev->caps.max_sq_desc_sz) {
+ gen = &dev->mdev->caps.gen;
+ if (desc_sz > gen->max_sq_desc_sz) {
mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
- desc_sz, dev->mdev->caps.max_sq_desc_sz);
+ desc_sz, gen->max_sq_desc_sz);
return -EINVAL;
}
@@ -325,9 +331,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
qp->sq.wqe_cnt = ucmd->sq_wqe_count;
- if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+ if (qp->sq.wqe_cnt > gen->max_wqes) {
mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
- qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+ qp->sq.wqe_cnt, gen->max_wqes);
return -EINVAL;
}
@@ -803,16 +809,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
+ struct mlx5_general_caps *gen;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ gen = &dev->mdev->caps.gen;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
return -EINVAL;
} else {
@@ -851,9 +859,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_dbg(dev, "invalid rq params\n");
return -EINVAL;
}
- if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) {
+ if (ucmd.sq_wqe_count > gen->max_wqes) {
mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
- ucmd.sq_wqe_count, dev->mdev->caps.max_wqes);
+ ucmd.sq_wqe_count, gen->max_wqes);
return -EINVAL;
}
err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
@@ -1144,6 +1152,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
+ struct mlx5_general_caps *gen;
struct mlx5_ib_dev *dev;
struct mlx5_ib_qp *qp;
u16 xrcdn = 0;
@@ -1161,11 +1170,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
}
+ gen = &dev->mdev->caps.gen;
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
case IB_QPT_XRC_INI:
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) {
mlx5_ib_dbg(dev, "XRC not supported\n");
return ERR_PTR(-ENOSYS);
}
@@ -1272,6 +1282,9 @@ enum {
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
+ struct mlx5_general_caps *gen;
+
+ gen = &dev->mdev->caps.gen;
if (rate == IB_RATE_PORT_CURRENT) {
return 0;
} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
@@ -1279,7 +1292,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
} else {
while (rate != IB_RATE_2_5_GBPS &&
!(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- dev->mdev->caps.stat_rate_support))
+ gen->stat_rate_support))
--rate;
}
@@ -1290,8 +1303,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ struct mlx5_general_caps *gen;
int err;
+ gen = &dev->mdev->caps.gen;
path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
@@ -1302,6 +1317,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
path->rlid = cpu_to_be16(ah->dlid);
if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) {
+ pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, gen->port[port - 1].gid_table_len);
+ return -EINVAL;
+ }
path->grh_mlid |= 1 << 7;
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
@@ -1317,22 +1337,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
path->static_rate = err;
path->port = port;
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
- pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
- return -EINVAL;
- }
-
- path->grh_mlid |= 1 << 7;
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
- path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- }
-
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
@@ -1492,6 +1496,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
+ struct mlx5_general_caps *gen;
struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
@@ -1500,6 +1505,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int mlx5_st;
int err;
+ gen = &dev->mdev->caps.gen;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1539,7 +1545,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
err = -EINVAL;
goto out;
}
- context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg;
+ context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg;
}
if (attr_mask & IB_QP_DEST_QPN)
@@ -1685,9 +1691,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
+ struct mlx5_general_caps *gen;
int err = -EINVAL;
int port;
+ gen = &dev->mdev->caps.gen;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -1699,21 +1707,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports))
+ (attr->port_num == 0 || attr->port_num > gen->num_ports))
goto out;
if (attr_mask & IB_QP_PKEY_INDEX) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len)
+ if (attr->pkey_index >= gen->port[port - 1].pkey_table_len)
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp)
+ attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp))
goto out;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp)
+ attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp))
goto out;
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
@@ -2020,56 +2028,31 @@ static u8 bs_selector(int block_size)
}
}
-static int format_selector(struct ib_sig_attrs *attr,
- struct ib_sig_domain *domain,
- int *selector)
+static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+ struct mlx5_bsf_inl *inl)
{
+ /* Valid inline section and allow BSF refresh */
+ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+ MLX5_BSF_REFRESH_DIF);
+ inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+ inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
+ /* repeating block */
+ inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
+ inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+ MLX5_DIF_CRC : MLX5_DIF_IPCS;
-#define FORMAT_DIF_NONE 0
-#define FORMAT_DIF_CRC_INC 8
-#define FORMAT_DIF_CRC_NO_INC 12
-#define FORMAT_DIF_CSUM_INC 13
-#define FORMAT_DIF_CSUM_NO_INC 14
+ if (domain->sig.dif.ref_remap)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
- switch (domain->sig.dif.type) {
- case IB_T10DIF_NONE:
- /* No DIF */
- *selector = FORMAT_DIF_NONE;
- break;
- case IB_T10DIF_TYPE1: /* Fall through */
- case IB_T10DIF_TYPE2:
- switch (domain->sig.dif.bg_type) {
- case IB_T10DIF_CRC:
- *selector = FORMAT_DIF_CRC_INC;
- break;
- case IB_T10DIF_CSUM:
- *selector = FORMAT_DIF_CSUM_INC;
- break;
- default:
- return 1;
- }
- break;
- case IB_T10DIF_TYPE3:
- switch (domain->sig.dif.bg_type) {
- case IB_T10DIF_CRC:
- *selector = domain->sig.dif.type3_inc_reftag ?
- FORMAT_DIF_CRC_INC :
- FORMAT_DIF_CRC_NO_INC;
- break;
- case IB_T10DIF_CSUM:
- *selector = domain->sig.dif.type3_inc_reftag ?
- FORMAT_DIF_CSUM_INC :
- FORMAT_DIF_CSUM_NO_INC;
- break;
- default:
- return 1;
- }
- break;
- default:
- return 1;
+ if (domain->sig.dif.app_escape) {
+ if (domain->sig.dif.ref_escape)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+ else
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
}
- return 0;
+ inl->dif_app_bitmask_check =
+ cpu_to_be16(domain->sig.dif.apptag_check_mask);
}
static int mlx5_set_bsf(struct ib_mr *sig_mr,
@@ -2080,45 +2063,49 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
struct mlx5_bsf_basic *basic = &bsf->basic;
struct ib_sig_domain *mem = &sig_attrs->mem;
struct ib_sig_domain *wire = &sig_attrs->wire;
- int ret, selector;
memset(bsf, 0, sizeof(*bsf));
+
+ /* Basic + Extended + Inline */
+ basic->bsf_size_sbs = 1 << 7;
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ /* Memory domain */
switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
case IB_SIG_TYPE_T10_DIF:
- if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
- return -EINVAL;
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+ mlx5_fill_inl_bsf(mem, &bsf->m_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
- /* Input domain check byte mask */
- basic->check_byte_mask = sig_attrs->check_mask;
+ /* Wire domain */
+ switch (sig_attrs->wire.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
- mem->sig.dif.type == wire->sig.dif.type) {
+ mem->sig_type == wire->sig_type) {
/* Same block structure */
- basic->bsf_size_sbs = 1 << 4;
+ basic->bsf_size_sbs |= 1 << 4;
if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask |= 0xc0;
+ basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
- basic->wire.copy_byte_mask |= 0x30;
+ basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
- basic->wire.copy_byte_mask |= 0x0f;
+ basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
} else
basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
- basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
- basic->raw_data_size = cpu_to_be32(data_size);
-
- ret = format_selector(sig_attrs, mem, &selector);
- if (ret)
- return -EINVAL;
- basic->m_bfs_psv = cpu_to_be32(selector << 24 |
- msig->psv_memory.psv_idx);
-
- ret = format_selector(sig_attrs, wire, &selector);
- if (ret)
- return -EINVAL;
- basic->w_bfs_psv = cpu_to_be32(selector << 24 |
- msig->psv_wire.psv_idx);
+ basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
+ mlx5_fill_inl_bsf(wire, &bsf->w_inl);
break;
-
default:
return -EINVAL;
}
@@ -2317,20 +2304,21 @@ static int set_psv_wr(struct ib_sig_domain *domain,
memset(psv_seg, 0, sizeof(*psv_seg));
psv_seg->psv_num = cpu_to_be32(psv_idx);
switch (domain->sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
case IB_SIG_TYPE_T10_DIF:
psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
domain->sig.dif.app_tag);
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
-
- *seg += sizeof(*psv_seg);
- *size += sizeof(*psv_seg) / 16;
break;
-
default:
pr_err("Bad signature type given.\n");
return 1;
}
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+
return 0;
}
@@ -2501,7 +2489,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
err = -EINVAL;
*bad_wr = wr;
@@ -2893,7 +2881,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
ib_ah_attr->port_num = path->port;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ if (ib_ah_attr->port_num == 0 ||
+ ib_ah_attr->port_num > dev->caps.gen.num_ports)
return;
ib_ah_attr->sl = path->sl & 0xf;
@@ -3011,10 +3000,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_general_caps *gen;
struct mlx5_ib_xrcd *xrcd;
int err;
- if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+ gen = &dev->mdev->caps.gen;
+ if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 70bd131ba646..97cc1baaa8e3 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_general_caps *gen;
struct mlx5_ib_srq *srq;
int desc_size;
int buf_size;
@@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
int is_xrc;
u32 flgs, xrcdn;
+ gen = &dev->mdev->caps.gen;
/* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) {
+ if (init_attr->attr.max_wr >= gen->max_srq_wqes) {
mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
init_attr->attr.max_wr,
- dev->mdev->caps.max_srq_wqes);
+ gen->max_srq_wqes);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f457fc55..8881fa376e06 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 90200245c5eb..02120d340d50 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1003,13 +1003,13 @@ int nes_init_cqp(struct nes_device *nesdev)
(sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
sizeof(struct nes_hw_cqp_qp_context);
- nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- &nesdev->cqp_pbase);
+ nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
+ nesdev->cqp_mem_size,
+ &nesdev->cqp_pbase);
if (!nesdev->cqp_vbase) {
nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
return -ENOMEM;
}
- memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
/* Allocate a twice the number of CQP requests as the SQ size */
nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
@@ -1691,13 +1691,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
(NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
sizeof(struct nes_hw_nic_qp_context);
- nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size,
- &nesvnic->nic_pbase);
+ nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
+ nesvnic->nic_mem_size,
+ &nesvnic->nic_pbase);
if (!nesvnic->nic_vbase) {
nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
return -ENOMEM;
}
- memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 218dd3574285..fef067c959fc 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1616,8 +1616,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
/* allocate the physical buffer space */
- mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
- &nescq->hw_cq.cq_pbase);
+ mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
+ &nescq->hw_cq.cq_pbase);
if (!mem) {
printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
@@ -1625,7 +1625,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
return ERR_PTR(-ENOMEM);
}
- memset(mem, 0, nescq->cq_mem_size);
nescq->hw_cq.cq_vbase = mem;
nescq->hw_cq.cq_head = 0;
nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 19011dbb930f..b43456ae124b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -40,7 +40,7 @@
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u"
#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -137,6 +137,7 @@ struct mqe_ctx {
u16 cqe_status;
u16 ext_status;
bool cmd_done;
+ bool fw_error_state;
};
struct ocrdma_hw_mr {
@@ -235,7 +236,10 @@ struct ocrdma_dev {
struct list_head entry;
struct rcu_head rcu;
int id;
- u64 stag_arr[OCRDMA_MAX_STAG];
+ u64 *stag_arr;
+ u8 sl; /* service level */
+ bool pfc_state;
+ atomic_t update_sl;
u16 pvid;
u32 asic_id;
@@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
}
+static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio)
+{
+ return *(pfc + prio);
+}
+
+static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio)
+{
+ return *(app_prio + prio);
+}
+
+static inline u8 ocrdma_is_enabled_and_synced(u32 state)
+{ /* May also be used to interpret TC-state, QCN-state
+ * Appl-state and Logical-link-state in future.
+ */
+ return (state & OCRDMA_STATE_FLAG_ENABLED) &&
+ (state & OCRDMA_STATE_FLAG_SYNC);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index d4cc01f10c01..ac02ce4e8040 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,8 +35,10 @@
#include "ocrdma_ah.h"
#include "ocrdma_hw.h"
+#define OCRDMA_VID_PCP_SHIFT 0xD
+
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
- struct ib_ah_attr *attr, int pdid)
+ struct ib_ah_attr *attr, union ib_gid *sgid, int pdid)
{
int status = 0;
u16 vlan_tag; bool vlan_enabled = false;
@@ -47,15 +49,14 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memset(&eth, 0, sizeof(eth));
memset(&grh, 0, sizeof(grh));
- ah->sgid_index = attr->grh.sgid_index;
-
+ /* VLAN */
vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag && (vlan_tag < 0x1000)) {
eth.eth_type = cpu_to_be16(0x8100);
eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
- vlan_tag |= (attr->sl & 7) << 13;
+ vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
eth.vlan_tag = cpu_to_be16(vlan_tag);
eth_sz = sizeof(struct ocrdma_eth_vlan);
vlan_enabled = true;
@@ -63,15 +64,14 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
eth_sz = sizeof(struct ocrdma_eth_basic);
}
+ /* MAC */
memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
- memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
- status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
- (union ib_gid *)&grh.sgid[0]);
- if (status)
- return status;
+ ah->sgid_index = attr->grh.sgid_index;
+ memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
+ memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
grh.tclass_flow = cpu_to_be32((6 << 28) |
(attr->grh.traffic_class << 24) |
@@ -79,8 +79,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
/* 0x1b is next header value in GRH */
grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
(0x1b << 8) | attr->grh.hop_limit);
-
- memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
+ /* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
if (vlan_enabled)
@@ -96,10 +95,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
struct ocrdma_ah *ah;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ union ib_gid sgid;
+ u8 zmac[ETH_ALEN];
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ if (atomic_cmpxchg(&dev->update_sl, 1, 0))
+ ocrdma_init_service_level(dev);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
@@ -107,7 +110,27 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
status = ocrdma_alloc_av(dev, ah);
if (status)
goto av_err;
- status = set_av_attr(dev, ah, attr, pd->id);
+
+ status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid);
+ if (status) {
+ pr_err("%s(): Failed to query sgid, status = %d\n",
+ __func__, status);
+ goto av_conf_err;
+ }
+
+ memset(&zmac, 0, ETH_ALEN);
+ if (pd->uctx &&
+ memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+ status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
+ attr->dmac, &attr->vlan_id);
+ if (status) {
+ pr_err("%s(): Failed to resolve dmac from gid."
+ "status = %d\n", __func__, status);
+ goto av_conf_err;
+ }
+ }
+
+ status = set_av_attr(dev, ah, attr, &sgid, pd->id);
if (status)
goto av_conf_err;
@@ -141,7 +164,7 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
struct ocrdma_av *av = ah->av;
struct ocrdma_grh *grh;
attr->ah_flags |= IB_AH_GRH;
- if (ah->av->valid & Bit(1)) {
+ if (ah->av->valid & OCRDMA_AV_VALID) {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_vlan));
attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 3bbf2010a821..638bff1ffc6c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
return mqe;
}
-static void *ocrdma_alloc_mqe(void)
-{
- return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
-}
-
static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
{
dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -525,7 +520,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
cmd->eqn = eq->id;
- cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+ cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe);
ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
cq->dma, PAGE_SIZE_4K);
@@ -566,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
- cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
- cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+ cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
+ cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -661,7 +656,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
{
struct ocrdma_qp *qp = NULL;
struct ocrdma_cq *cq = NULL;
- struct ib_event ib_evt = { 0 };
+ struct ib_event ib_evt;
int cq_event = 0;
int qp_event = 1;
int srq_event = 0;
@@ -674,6 +669,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+ memset(&ib_evt, 0, sizeof(ib_evt));
+
ib_evt.device = &dev->ibdev;
switch (type) {
@@ -771,6 +768,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
break;
+
+ case OCRDMA_ASYNC_EVENT_COS_VALUE:
+ atomic_set(&dev->update_sl, 1);
+ break;
default:
/* Not interested evts. */
break;
@@ -962,8 +963,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
msecs_to_jiffies(30000));
if (status)
return 0;
- else
+ else {
+ dev->mqe_ctx.fw_error_state = true;
+ pr_err("%s(%d) mailbox timeout: fw not responding\n",
+ __func__, dev->id);
return -1;
+ }
}
/* issue a mailbox command on the MQ */
@@ -975,6 +980,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
+ if (dev->mqe_ctx.fw_error_state)
+ goto mbx_err;
ocrdma_post_mqe(dev, mqe);
status = ocrdma_wait_mqe_cmpl(dev);
if (status)
@@ -1078,7 +1085,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
attr->max_mw = rsp->max_mw;
attr->max_mr = rsp->max_mr;
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
+ rsp->max_mr_size_lo;
attr->max_fmr = 0;
attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
@@ -1176,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
{
struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
- struct ocrdma_rdma_stats_resp *old_stats = NULL;
+ struct ocrdma_rdma_stats_resp *old_stats;
int status;
- old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+ old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL);
if (old_stats == NULL)
return -ENOMEM;
@@ -1222,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
struct mgmt_hba_attribs *hba_attribs;
- mqe = ocrdma_alloc_mqe();
+ mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
if (!mqe)
return status;
- memset(mqe, 0, sizeof(*mqe));
dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev,
@@ -1252,7 +1259,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
- dev->hba_port_num = hba_attribs->phy_port;
+ dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
+ OCRDMA_HBA_ATTRB_PTNUM_MASK)
+ >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
strncpy(dev->model_number,
hba_attribs->controller_model_number, 31);
}
@@ -1302,7 +1311,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
- *lnk_speed = rsp->phys_port_speed;
+ *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+ >> OCRDMA_PHY_PS_SHIFT;
mbx_err:
kfree(cmd);
@@ -1328,11 +1338,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
goto mbx_err;
rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
- dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.phy_type =
+ (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK);
+ dev->phy.interface_type =
+ (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK)
+ >> OCRDMA_IF_TYPE_SHIFT;
dev->phy.auto_speeds_supported =
- le16_to_cpu(rsp->auto_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK);
dev->phy.fixed_speeds_supported =
- le16_to_cpu(rsp->fixed_speeds_supported);
+ (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK)
+ >> OCRDMA_FSPEED_SUPP_SHIFT;
mbx_err:
kfree(cmd);
return status;
@@ -1457,8 +1472,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
- pbes[i].pa_lo = (u32) (pa & 0xffffffff);
- pbes[i].pa_hi = (u32) upper_32_bits(pa);
+ pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff);
+ pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa));
pa += PAGE_SIZE;
}
cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
@@ -1501,6 +1516,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
dev->av_tbl.pa);
+ dev->av_tbl.va = NULL;
dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
dev->av_tbl.pbl.pa);
kfree(cmd);
@@ -1624,14 +1640,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
OCRDMA_CREATE_CQ_TYPE_SHIFT;
cq->phase_change = false;
- cmd->cmd.cqe_count = (cq->len / cqe_size);
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size);
} else {
- cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+ cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1;
cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
cq->phase_change = true;
}
- cmd->cmd.pd_id = pd_id; /* valid only for v3 */
+ /* pd_id valid only for v3 */
+ cmd->cmd.pdid_cqecnt |= (pd_id <<
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT);
ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status)
@@ -2206,7 +2224,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
qp->rq_cq = cq;
- if (pd->dpp_enabled && pd->num_dpp_qp) {
+ if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
+ (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
}
@@ -2254,7 +2273,8 @@ mbx_err:
static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
- struct ib_qp_attr *attrs)
+ struct ib_qp_attr *attrs,
+ int attr_mask)
{
int status;
struct ib_ah_attr *ah_attr = &attrs->ah_attr;
@@ -2264,6 +2284,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
return -EINVAL;
+ if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0))
+ ocrdma_init_service_level(qp->dev);
cmd->params.tclass_sq_psn |=
(ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
@@ -2292,11 +2314,13 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- vlan_id = ah_attr->vlan_id;
- if (vlan_id && (vlan_id < 0x1000)) {
+ if (attr_mask & IB_QP_VID) {
+ vlan_id = attrs->vlan_id;
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+ cmd->params.rnt_rc_sl_fl |=
+ (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
}
return 0;
}
@@ -2318,7 +2342,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
}
if (attr_mask & IB_QP_AV) {
- status = ocrdma_set_av_params(qp, cmd, attrs);
+ status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask);
if (status)
return status;
} else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
@@ -2604,6 +2628,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
return status;
}
+static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg)
+{
+ int status = 0;
+ dma_addr_t pa;
+ struct ocrdma_mqe cmd;
+
+ struct ocrdma_get_dcbx_cfg_req *req = NULL;
+ struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge;
+
+ memset(&cmd, 0, sizeof(struct ocrdma_mqe));
+ cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp),
+ sizeof(struct ocrdma_get_dcbx_cfg_req));
+ req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL);
+ if (!req) {
+ status = -ENOMEM;
+ goto mem_err;
+ }
+
+ cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL);
+ mqe_sge->pa_hi = (u32) upper_32_bits(pa);
+ mqe_sge->len = cmd.hdr.pyld_len;
+
+ memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
+ ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
+ OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
+ req->param_type = ptype;
+
+ status = ocrdma_mbx_cmd(dev, &cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req;
+ ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp));
+ memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg));
+
+mbx_err:
+ dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa);
+mem_err:
+ return status;
+}
+
+#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08
+#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05
+
+static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
+ struct ocrdma_dcbx_cfg *dcbxcfg,
+ u8 *srvc_lvl)
+{
+ int status = -EINVAL, indx, slindx;
+ int ventry_cnt;
+ struct ocrdma_app_parameter *app_param;
+ u8 valid, proto_sel;
+ u8 app_prio, pfc_prio;
+ u16 proto;
+
+ if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) {
+ pr_info("%s ocrdma%d DCBX is disabled\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ goto out;
+ }
+
+ if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) {
+ pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ (ptype > 0 ? "operational" : "admin"),
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ?
+ "enabled" : "disabled",
+ (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ?
+ "" : ", not sync'ed");
+ goto out;
+ } else {
+ pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ }
+
+ ventry_cnt = (dcbxcfg->tcv_aev_opv_st >>
+ OCRDMA_DCBX_APP_ENTRY_SHIFT)
+ & OCRDMA_DCBX_STATE_MASK;
+
+ for (indx = 0; indx < ventry_cnt; indx++) {
+ app_param = &dcbxcfg->app_param[indx];
+ valid = (app_param->valid_proto_app >>
+ OCRDMA_APP_PARAM_VALID_SHIFT)
+ & OCRDMA_APP_PARAM_VALID_MASK;
+ proto_sel = (app_param->valid_proto_app
+ >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT)
+ & OCRDMA_APP_PARAM_PROTO_SEL_MASK;
+ proto = app_param->valid_proto_app &
+ OCRDMA_APP_PARAM_APP_PROTO_MASK;
+
+ if (
+ valid && proto == OCRDMA_APP_PROTO_ROCE &&
+ proto_sel == OCRDMA_PROTO_SELECT_L2) {
+ for (slindx = 0; slindx <
+ OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
+ app_prio = ocrdma_get_app_prio(
+ (u8 *)app_param->app_prio,
+ slindx);
+ pfc_prio = ocrdma_get_pfc_prio(
+ (u8 *)dcbxcfg->pfc_prio,
+ slindx);
+
+ if (app_prio && pfc_prio) {
+ *srvc_lvl = slindx;
+ status = 0;
+ goto out;
+ }
+ }
+ if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) {
+ pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n",
+ dev_name(&dev->nic_info.pdev->dev),
+ dev->id, proto);
+ }
+ }
+ }
+
+out:
+ return status;
+}
+
+void ocrdma_init_service_level(struct ocrdma_dev *dev)
+{
+ int status = 0, indx;
+ struct ocrdma_dcbx_cfg dcbxcfg;
+ u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL;
+ int ptype = OCRDMA_PARAMETER_TYPE_OPER;
+
+ for (indx = 0; indx < 2; indx++) {
+ status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg);
+ if (status) {
+ pr_err("%s(): status=%d\n", __func__, status);
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ status = ocrdma_parse_dcbxcfg_rsp(dev, ptype,
+ &dcbxcfg, &srvc_lvl);
+ if (status) {
+ ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+ continue;
+ }
+
+ break;
+ }
+
+ if (status)
+ pr_info("%s ocrdma%d service level default\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ else
+ pr_info("%s ocrdma%d service level %d\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id,
+ srvc_lvl);
+
+ dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state);
+ dev->sl = srvc_lvl;
+}
+
int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
{
int i;
@@ -2709,13 +2895,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
goto conf_err;
status = ocrdma_mbx_get_phy_info(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
status = ocrdma_mbx_get_ctrl_attribs(dev);
if (status)
- goto conf_err;
+ goto info_attrb_err;
return 0;
+info_attrb_err:
+ ocrdma_mbx_delete_ah_tbl(dev);
conf_err:
ocrdma_destroy_mq(dev);
mq_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index e513f7293142..6eed8f191322 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
char *port_speed_string(struct ocrdma_dev *dev);
+void ocrdma_init_service_level(struct ocrdma_dev *);
+
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7c504e079744..b0b2257b8e04 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
if (!dev->qp_tbl)
goto alloc_err;
}
+
+ dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
+ if (dev->stag_arr == NULL)
+ goto alloc_err;
+
spin_lock_init(&dev->av_tbl.lock);
spin_lock_init(&dev->flush_q_lock);
return 0;
@@ -334,6 +339,7 @@ alloc_err:
static void ocrdma_free_resources(struct ocrdma_dev *dev)
{
+ kfree(dev->stag_arr);
kfree(dev->qp_tbl);
kfree(dev->cq_tbl);
kfree(dev->sgid_tbl);
@@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
{
struct ocrdma_dev *dev = dev_get_drvdata(device);
- return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
+}
+
+static ssize_t show_hca_type(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type
};
static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
@@ -372,6 +388,68 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
}
+static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
+{
+ /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
+ union ib_gid *sgid = &dev->sgid_tbl[0];
+
+ sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ ocrdma_get_guid(dev, &sgid->raw[8]);
+}
+
+static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+ struct in_device *in_dev;
+ union ib_gid gid;
+ in_dev = in_dev_get(net);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ ocrdma_add_sgid(dev, &gid);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+}
+
+static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev,
+ struct net_device *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+ in6_dev = in6_dev_get(net);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ ocrdma_add_sgid(dev, pgid);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
+{
+ struct net_device *net_dev;
+
+ for_each_netdev(&init_net, net_dev) {
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ?
+ rdma_vlan_dev_real_dev(net_dev) : net_dev;
+
+ if (real_dev == dev->nic_info.netdev) {
+ ocrdma_add_default_sgid(dev);
+ ocrdma_init_ipv4_gids(dev, net_dev);
+ ocrdma_init_ipv6_gids(dev, net_dev);
+ }
+ }
+}
+
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
int status = 0, i;
@@ -399,6 +477,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
+ ocrdma_init_service_level(dev);
+ ocrdma_init_gid_table(dev);
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
@@ -508,6 +588,12 @@ static int ocrdma_close(struct ocrdma_dev *dev)
return 0;
}
+static void ocrdma_shutdown(struct ocrdma_dev *dev)
+{
+ ocrdma_close(dev);
+ ocrdma_remove(dev);
+}
+
/* event handling via NIC driver ensures that all the NIC specific
* initialization done before RoCE driver notifies
* event to stack.
@@ -521,6 +607,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
case BE_DEV_DOWN:
ocrdma_close(dev);
break;
+ case BE_DEV_SHUTDOWN:
+ ocrdma_shutdown(dev);
+ break;
}
}
@@ -567,8 +656,10 @@ static int __init ocrdma_init_module(void)
return 0;
err_be_reg:
+#if IS_ENABLED(CONFIG_IPV6)
ocrdma_unregister_inet6addr_notifier();
err_notifier6:
+#endif
ocrdma_unregister_inetaddr_notifier();
return status;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 96c9ee602ba4..4e036480c1a8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -28,8 +28,6 @@
#ifndef __OCRDMA_SLI_H__
#define __OCRDMA_SLI_H__
-#define Bit(_b) (1 << (_b))
-
enum {
OCRDMA_ASIC_GEN_SKH_R = 0x04,
OCRDMA_ASIC_GEN_LANCER = 0x0B
@@ -44,35 +42,39 @@ enum {
#define OCRDMA_SUBSYS_ROCE 10
enum {
OCRDMA_CMD_QUERY_CONFIG = 1,
- OCRDMA_CMD_ALLOC_PD,
- OCRDMA_CMD_DEALLOC_PD,
-
- OCRDMA_CMD_CREATE_AH_TBL,
- OCRDMA_CMD_DELETE_AH_TBL,
-
- OCRDMA_CMD_CREATE_QP,
- OCRDMA_CMD_QUERY_QP,
- OCRDMA_CMD_MODIFY_QP,
- OCRDMA_CMD_DELETE_QP,
-
- OCRDMA_CMD_RSVD1,
- OCRDMA_CMD_ALLOC_LKEY,
- OCRDMA_CMD_DEALLOC_LKEY,
- OCRDMA_CMD_REGISTER_NSMR,
- OCRDMA_CMD_REREGISTER_NSMR,
- OCRDMA_CMD_REGISTER_NSMR_CONT,
- OCRDMA_CMD_QUERY_NSMR,
- OCRDMA_CMD_ALLOC_MW,
- OCRDMA_CMD_QUERY_MW,
-
- OCRDMA_CMD_CREATE_SRQ,
- OCRDMA_CMD_QUERY_SRQ,
- OCRDMA_CMD_MODIFY_SRQ,
- OCRDMA_CMD_DELETE_SRQ,
-
- OCRDMA_CMD_ATTACH_MCAST,
- OCRDMA_CMD_DETACH_MCAST,
- OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_CMD_ALLOC_PD = 2,
+ OCRDMA_CMD_DEALLOC_PD = 3,
+
+ OCRDMA_CMD_CREATE_AH_TBL = 4,
+ OCRDMA_CMD_DELETE_AH_TBL = 5,
+
+ OCRDMA_CMD_CREATE_QP = 6,
+ OCRDMA_CMD_QUERY_QP = 7,
+ OCRDMA_CMD_MODIFY_QP = 8 ,
+ OCRDMA_CMD_DELETE_QP = 9,
+
+ OCRDMA_CMD_RSVD1 = 10,
+ OCRDMA_CMD_ALLOC_LKEY = 11,
+ OCRDMA_CMD_DEALLOC_LKEY = 12,
+ OCRDMA_CMD_REGISTER_NSMR = 13,
+ OCRDMA_CMD_REREGISTER_NSMR = 14,
+ OCRDMA_CMD_REGISTER_NSMR_CONT = 15,
+ OCRDMA_CMD_QUERY_NSMR = 16,
+ OCRDMA_CMD_ALLOC_MW = 17,
+ OCRDMA_CMD_QUERY_MW = 18,
+
+ OCRDMA_CMD_CREATE_SRQ = 19,
+ OCRDMA_CMD_QUERY_SRQ = 20,
+ OCRDMA_CMD_MODIFY_SRQ = 21,
+ OCRDMA_CMD_DELETE_SRQ = 22,
+
+ OCRDMA_CMD_ATTACH_MCAST = 23,
+ OCRDMA_CMD_DETACH_MCAST = 24,
+
+ OCRDMA_CMD_CREATE_RBQ = 25,
+ OCRDMA_CMD_DESTROY_RBQ = 26,
+
+ OCRDMA_CMD_GET_RDMA_STATS = 27,
OCRDMA_CMD_MAX
};
@@ -99,11 +101,11 @@ enum {
QTYPE_MCCQ = 3
};
-#define OCRDMA_MAX_SGID (8)
+#define OCRDMA_MAX_SGID 8
#define OCRDMA_MAX_QP 2048
#define OCRDMA_MAX_CQ 2048
-#define OCRDMA_MAX_STAG 8192
+#define OCRDMA_MAX_STAG 16384
enum {
OCRDMA_DB_RQ_OFFSET = 0xE0,
@@ -124,33 +126,33 @@ enum {
#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
/* qid #2 msbits at 12-11 */
#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1
-#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */
+#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT 16 /* bits 16 - 28 */
/* Rearm bit */
-#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */
+#define OCRDMA_DB_CQ_REARM_SHIFT 29 /* bit 29 */
/* solicited bit */
-#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */
+#define OCRDMA_DB_CQ_SOLICIT_SHIFT 31 /* bit 31 */
#define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */
#define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */
-#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */
+#define OCRDMA_EQ_ID_EXT_MASK_SHIFT 2 /* qid bits 9-13 at 11-15 */
/* Clear the interrupt for this eq */
-#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */
+#define OCRDMA_EQ_CLR_SHIFT 9 /* bit 9 */
/* Must be 1 */
-#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */
+#define OCRDMA_EQ_TYPE_SHIFT 10 /* bit 10 */
/* Number of event entries processed */
-#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */
+#define OCRDMA_NUM_EQE_SHIFT 16 /* bits 16 - 28 */
/* Rearm bit */
-#define OCRDMA_REARM_SHIFT (29) /* bit 29 */
+#define OCRDMA_REARM_SHIFT 29 /* bit 29 */
#define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */
/* Number of entries posted */
-#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */
+#define OCRDMA_MQ_NUM_MQE_SHIFT 16 /* bits 16 - 29 */
-#define OCRDMA_MIN_HPAGE_SIZE (4096)
+#define OCRDMA_MIN_HPAGE_SIZE 4096
-#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
-#define OCRDMA_MAX_Q_PAGES (8)
+#define OCRDMA_MIN_Q_PAGE_SIZE 4096
+#define OCRDMA_MAX_Q_PAGES 8
#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C
#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF
@@ -166,14 +168,14 @@ enum {
# 6: 256K Bytes
# 7: 512K Bytes
*/
-#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT 8
#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
-#define MAX_OCRDMA_QP_PAGES (8)
+#define MAX_OCRDMA_QP_PAGES 8
#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
-#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
-#define OCRDMA_DPP_CQE_SIZE (4)
+#define OCRDMA_CREATE_CQ_MAX_PAGES 4
+#define OCRDMA_DPP_CQE_SIZE 4
#define OCRDMA_GEN2_MAX_CQE 1024
#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
@@ -234,7 +236,7 @@ struct ocrdma_mqe_sge {
enum {
OCRDMA_MQE_HDR_EMB_SHIFT = 0,
- OCRDMA_MQE_HDR_EMB_MASK = Bit(0),
+ OCRDMA_MQE_HDR_EMB_MASK = BIT(0),
OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3,
OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24,
@@ -288,7 +290,7 @@ struct ocrdma_pa {
u32 hi;
};
-#define MAX_OCRDMA_EQ_PAGES (8)
+#define MAX_OCRDMA_EQ_PAGES 8
struct ocrdma_create_eq_req {
struct ocrdma_mbx_hdr req;
u32 num_pages;
@@ -300,7 +302,7 @@ struct ocrdma_create_eq_req {
};
enum {
- OCRDMA_CREATE_EQ_VALID = Bit(29),
+ OCRDMA_CREATE_EQ_VALID = BIT(29),
OCRDMA_CREATE_EQ_CNT_SHIFT = 26,
OCRDMA_CREATE_CQ_DELAY_SHIFT = 13,
};
@@ -310,7 +312,7 @@ struct ocrdma_create_eq_rsp {
u32 vector_eqid;
};
-#define OCRDMA_EQ_MINOR_OTHER (0x1)
+#define OCRDMA_EQ_MINOR_OTHER 0x1
enum {
OCRDMA_MCQE_STATUS_SHIFT = 0,
@@ -318,13 +320,13 @@ enum {
OCRDMA_MCQE_ESTATUS_SHIFT = 16,
OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
OCRDMA_MCQE_CONS_SHIFT = 27,
- OCRDMA_MCQE_CONS_MASK = Bit(27),
+ OCRDMA_MCQE_CONS_MASK = BIT(27),
OCRDMA_MCQE_CMPL_SHIFT = 28,
- OCRDMA_MCQE_CMPL_MASK = Bit(28),
+ OCRDMA_MCQE_CMPL_MASK = BIT(28),
OCRDMA_MCQE_AE_SHIFT = 30,
- OCRDMA_MCQE_AE_MASK = Bit(30),
+ OCRDMA_MCQE_AE_MASK = BIT(30),
OCRDMA_MCQE_VALID_SHIFT = 31,
- OCRDMA_MCQE_VALID_MASK = Bit(31)
+ OCRDMA_MCQE_VALID_MASK = BIT(31)
};
struct ocrdma_mcqe {
@@ -335,13 +337,13 @@ struct ocrdma_mcqe {
};
enum {
- OCRDMA_AE_MCQE_QPVALID = Bit(31),
+ OCRDMA_AE_MCQE_QPVALID = BIT(31),
OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF,
- OCRDMA_AE_MCQE_CQVALID = Bit(31),
+ OCRDMA_AE_MCQE_CQVALID = BIT(31),
OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF,
- OCRDMA_AE_MCQE_VALID = Bit(31),
- OCRDMA_AE_MCQE_AE = Bit(30),
+ OCRDMA_AE_MCQE_VALID = BIT(31),
+ OCRDMA_AE_MCQE_AE = BIT(30),
OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16,
OCRDMA_AE_MCQE_EVENT_TYPE_MASK =
0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
@@ -382,9 +384,9 @@ enum {
OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF <<
OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30,
- OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = BIT(30),
OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31,
- OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31)
+ OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = BIT(31)
};
struct ocrdma_ae_mpa_mcqe {
@@ -408,9 +410,9 @@ enum {
OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF <<
OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30,
- OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = BIT(30),
OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31,
- OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31)
+ OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = BIT(31)
};
struct ocrdma_ae_qp_mcqe {
@@ -422,7 +424,12 @@ struct ocrdma_ae_qp_mcqe {
#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
-#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
+
+enum ocrdma_async_grp5_events {
+ OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
+ OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02,
+ OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03
+};
enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_CQ_ERROR = 0x00,
@@ -440,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
/* mailbox command request and responses */
enum {
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
- OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2),
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = BIT(2),
OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3,
- OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3),
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = BIT(3),
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8,
OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF <<
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
@@ -525,8 +532,8 @@ struct ocrdma_mbx_query_config {
u32 max_ird_ord_per_qp;
u32 max_shared_ird_ord;
u32 max_mr;
- u32 max_mr_size_lo;
u32 max_mr_size_hi;
+ u32 max_mr_size_lo;
u32 max_num_mr_pbl;
u32 max_mw;
u32 max_fmr;
@@ -580,17 +587,26 @@ enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+enum {
+ OCRDMA_IF_TYPE_MASK = 0xFFFF0000,
+ OCRDMA_IF_TYPE_SHIFT = 0x10,
+ OCRDMA_PHY_TYPE_MASK = 0x0000FFFF,
+ OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000,
+ OCRDMA_FUTURE_DETAILS_SHIFT = 0x10,
+ OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF,
+ OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000,
+ OCRDMA_FSPEED_SUPP_SHIFT = 0x10,
+ OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF
+};
+
struct ocrdma_get_phy_info_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u16 phy_type;
- u16 interface_type;
+ u32 ityp_ptyp;
u32 misc_params;
- u16 ext_phy_details;
- u16 rsvd;
- u16 auto_speeds_supported;
- u16 fixed_speeds_supported;
+ u32 ftrdtl_exphydtl;
+ u32 fspeed_aspeed;
u32 future_use[2];
};
@@ -603,19 +619,34 @@ enum {
OCRDMA_PHY_SPEED_40GBPS = 0x20
};
+enum {
+ OCRDMA_PORT_NUM_MASK = 0x3F,
+ OCRDMA_PT_MASK = 0xC0,
+ OCRDMA_PT_SHIFT = 0x6,
+ OCRDMA_LINK_DUP_MASK = 0x0000FF00,
+ OCRDMA_LINK_DUP_SHIFT = 0x8,
+ OCRDMA_PHY_PS_MASK = 0x00FF0000,
+ OCRDMA_PHY_PS_SHIFT = 0x10,
+ OCRDMA_PHY_PFLT_MASK = 0xFF000000,
+ OCRDMA_PHY_PFLT_SHIFT = 0x18,
+ OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
+ OCRDMA_QOS_LNKSP_SHIFT = 0x10,
+ OCRDMA_LLST_MASK = 0xFF,
+ OCRDMA_PLFC_MASK = 0x00000400,
+ OCRDMA_PLFC_SHIFT = 0x8,
+ OCRDMA_PLRFC_MASK = 0x00000200,
+ OCRDMA_PLRFC_SHIFT = 0x8,
+ OCRDMA_PLTFC_MASK = 0x00000100,
+ OCRDMA_PLTFC_SHIFT = 0x8
+};
struct ocrdma_get_link_speed_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
- u8 pt_port_num;
- u8 link_duplex;
- u8 phys_port_speed;
- u8 phys_port_fault;
- u16 rsvd1;
- u16 qos_lnk_speed;
- u8 logical_lnk_status;
- u8 rsvd2[3];
+ u32 pflt_pps_ld_pnum;
+ u32 qos_lsp;
+ u32 res_lls;
};
enum {
@@ -639,9 +670,9 @@ enum {
OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF,
OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12,
- OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12),
- OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14),
- OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15),
+ OCRDMA_CREATE_CQ_COALESCWM_MASK = BIT(13) | BIT(12),
+ OCRDMA_CREATE_CQ_FLAGS_NODELAY = BIT(14),
+ OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = BIT(15),
OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF,
OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF
@@ -654,8 +685,8 @@ enum {
OCRDMA_CREATE_CQ_EQID_SHIFT = 22,
OCRDMA_CREATE_CQ_CNT_SHIFT = 27,
- OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29),
- OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31),
+ OCRDMA_CREATE_CQ_FLAGS_VALID = BIT(29),
+ OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = BIT(31),
OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID |
OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
OCRDMA_CREATE_CQ_FLAGS_NODELAY
@@ -666,8 +697,7 @@ struct ocrdma_create_cq_cmd {
u32 pgsz_pgcnt;
u32 ev_cnt_flags;
u32 eqn;
- u16 cqe_count;
- u16 pd_id;
+ u32 pdid_cqecnt;
u32 rsvd6;
struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
};
@@ -678,6 +708,10 @@ struct ocrdma_create_cq {
};
enum {
+ OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10
+};
+
+enum {
OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
};
@@ -695,8 +729,8 @@ enum {
OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22,
OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16,
OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16,
- OCRDMA_CREATE_MQ_VALID = Bit(31),
- OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
+ OCRDMA_CREATE_MQ_VALID = BIT(31),
+ OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = BIT(0)
};
struct ocrdma_create_mq_req {
@@ -747,7 +781,7 @@ enum {
OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16,
OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19,
OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29,
- OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29),
+ OCRDMA_CREATE_QP_REQ_QPT_MASK = BIT(31) | BIT(30) | BIT(29),
OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0,
OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF,
@@ -762,23 +796,23 @@ enum {
OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0,
- OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0),
+ OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = BIT(0),
OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1,
- OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1),
+ OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = BIT(1),
OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2,
- OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2),
+ OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = BIT(2),
OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3,
- OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3),
+ OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = BIT(3),
OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4,
- OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4),
+ OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = BIT(4),
OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5,
- OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5),
+ OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = BIT(5),
OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6,
- OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6),
+ OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = BIT(6),
OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7,
- OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = BIT(7),
OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8,
- OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = BIT(8),
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16,
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF <<
OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
@@ -891,7 +925,7 @@ enum {
OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF <<
OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
- OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0),
+ OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = BIT(0),
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1,
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF <<
OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
@@ -928,38 +962,38 @@ enum {
OCRDMA_MODIFY_QP_ID_SHIFT = 0,
OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF,
- OCRDMA_QP_PARA_QPS_VALID = Bit(0),
- OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1),
- OCRDMA_QP_PARA_PKEY_VALID = Bit(2),
- OCRDMA_QP_PARA_QKEY_VALID = Bit(3),
- OCRDMA_QP_PARA_PMTU_VALID = Bit(4),
- OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5),
- OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6),
- OCRDMA_QP_PARA_RRC_VALID = Bit(7),
- OCRDMA_QP_PARA_RQPSN_VALID = Bit(8),
- OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9),
- OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10),
- OCRDMA_QP_PARA_RNT_VALID = Bit(11),
- OCRDMA_QP_PARA_SQPSN_VALID = Bit(12),
- OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13),
- OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14),
- OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15),
- OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16),
- OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17),
- OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18),
- OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19),
- OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20),
- OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21),
- OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22),
- OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23),
- OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24),
- OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25),
- OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26),
-
- OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0),
- OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1),
- OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2),
- OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3)
+ OCRDMA_QP_PARA_QPS_VALID = BIT(0),
+ OCRDMA_QP_PARA_SQD_ASYNC_VALID = BIT(1),
+ OCRDMA_QP_PARA_PKEY_VALID = BIT(2),
+ OCRDMA_QP_PARA_QKEY_VALID = BIT(3),
+ OCRDMA_QP_PARA_PMTU_VALID = BIT(4),
+ OCRDMA_QP_PARA_ACK_TO_VALID = BIT(5),
+ OCRDMA_QP_PARA_RETRY_CNT_VALID = BIT(6),
+ OCRDMA_QP_PARA_RRC_VALID = BIT(7),
+ OCRDMA_QP_PARA_RQPSN_VALID = BIT(8),
+ OCRDMA_QP_PARA_MAX_IRD_VALID = BIT(9),
+ OCRDMA_QP_PARA_MAX_ORD_VALID = BIT(10),
+ OCRDMA_QP_PARA_RNT_VALID = BIT(11),
+ OCRDMA_QP_PARA_SQPSN_VALID = BIT(12),
+ OCRDMA_QP_PARA_DST_QPN_VALID = BIT(13),
+ OCRDMA_QP_PARA_MAX_WQE_VALID = BIT(14),
+ OCRDMA_QP_PARA_MAX_RQE_VALID = BIT(15),
+ OCRDMA_QP_PARA_SGE_SEND_VALID = BIT(16),
+ OCRDMA_QP_PARA_SGE_RECV_VALID = BIT(17),
+ OCRDMA_QP_PARA_SGE_WR_VALID = BIT(18),
+ OCRDMA_QP_PARA_INB_RDEN_VALID = BIT(19),
+ OCRDMA_QP_PARA_INB_WREN_VALID = BIT(20),
+ OCRDMA_QP_PARA_FLOW_LBL_VALID = BIT(21),
+ OCRDMA_QP_PARA_BIND_EN_VALID = BIT(22),
+ OCRDMA_QP_PARA_ZLKEY_EN_VALID = BIT(23),
+ OCRDMA_QP_PARA_FMR_EN_VALID = BIT(24),
+ OCRDMA_QP_PARA_INBAT_EN_VALID = BIT(25),
+ OCRDMA_QP_PARA_VLAN_EN_VALID = BIT(26),
+
+ OCRDMA_MODIFY_QP_FLAGS_RD = BIT(0),
+ OCRDMA_MODIFY_QP_FLAGS_WR = BIT(1),
+ OCRDMA_MODIFY_QP_FLAGS_SEND = BIT(2),
+ OCRDMA_MODIFY_QP_FLAGS_ATOMIC = BIT(3)
};
enum {
@@ -978,15 +1012,15 @@ enum {
OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF <<
OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
- OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0),
- OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1),
- OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2),
- OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3),
- OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4),
+ OCRDMA_QP_PARAMS_FLAGS_FMR_EN = BIT(0),
+ OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = BIT(1),
+ OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = BIT(2),
+ OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = BIT(3),
+ OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = BIT(4),
OCRDMA_QP_PARAMS_STATE_SHIFT = 5,
- OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7),
- OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8),
- OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9),
+ OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7),
+ OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8),
+ OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9),
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1231,7 +1265,6 @@ struct ocrdma_destroy_srq {
enum {
OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
- OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
OCRDMA_DPP_PAGE_SIZE = 4096
};
@@ -1242,7 +1275,7 @@ struct ocrdma_alloc_pd {
};
enum {
- OCRDMA_ALLOC_PD_RSP_DPP = Bit(16),
+ OCRDMA_ALLOC_PD_RSP_DPP = BIT(16),
OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20,
OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF,
};
@@ -1274,18 +1307,18 @@ enum {
OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF,
OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0,
- OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0),
+ OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = BIT(0),
OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1,
- OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1),
+ OCRDMA_ALLOC_LKEY_FMR_MASK = BIT(1),
OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2,
- OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2),
+ OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = BIT(2),
OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3,
- OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3),
+ OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = BIT(3),
OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4,
- OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4),
+ OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = BIT(4),
OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5,
- OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5),
- OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6),
+ OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = BIT(5),
+ OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = BIT(6),
OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6,
OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16,
OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF <<
@@ -1344,21 +1377,21 @@ enum {
OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF <<
OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24,
- OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24),
+ OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = BIT(24),
OCRDMA_REG_NSMR_ZB_SHIFT = 25,
- OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25),
+ OCRDMA_REG_NSMR_ZB_SHIFT_MASK = BIT(25),
OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26,
- OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26),
+ OCRDMA_REG_NSMR_REMOTE_INV_MASK = BIT(26),
OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27,
- OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27),
+ OCRDMA_REG_NSMR_REMOTE_WR_MASK = BIT(27),
OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28,
- OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28),
+ OCRDMA_REG_NSMR_REMOTE_RD_MASK = BIT(28),
OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29,
- OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29),
+ OCRDMA_REG_NSMR_LOCAL_WR_MASK = BIT(29),
OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30,
- OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30),
+ OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = BIT(30),
OCRDMA_REG_NSMR_LAST_SHIFT = 31,
- OCRDMA_REG_NSMR_LAST_MASK = Bit(31)
+ OCRDMA_REG_NSMR_LAST_MASK = BIT(31)
};
struct ocrdma_reg_nsmr {
@@ -1385,7 +1418,7 @@ enum {
OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31,
- OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31)
+ OCRDMA_REG_NSMR_CONT_LAST_MASK = BIT(31)
};
struct ocrdma_reg_nsmr_cont {
@@ -1531,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp {
enum {
OCRDMA_EQE_VALID_SHIFT = 0,
- OCRDMA_EQE_VALID_MASK = Bit(0),
+ OCRDMA_EQE_VALID_MASK = BIT(0),
OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE,
OCRDMA_EQE_RESOURCE_ID_SHIFT = 16,
OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF <<
@@ -1589,11 +1622,11 @@ enum {
OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
OCRDMA_CQE_STATUS_SHIFT = 16,
OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT,
- OCRDMA_CQE_VALID = Bit(31),
- OCRDMA_CQE_INVALIDATE = Bit(30),
- OCRDMA_CQE_QTYPE = Bit(29),
- OCRDMA_CQE_IMM = Bit(28),
- OCRDMA_CQE_WRITE_IMM = Bit(27),
+ OCRDMA_CQE_VALID = BIT(31),
+ OCRDMA_CQE_INVALIDATE = BIT(30),
+ OCRDMA_CQE_QTYPE = BIT(29),
+ OCRDMA_CQE_IMM = BIT(28),
+ OCRDMA_CQE_WRITE_IMM = BIT(27),
OCRDMA_CQE_QTYPE_SQ = 0,
OCRDMA_CQE_QTYPE_RQ = 1,
OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF
@@ -1737,8 +1770,8 @@ struct ocrdma_grh {
u16 rsvd;
} __packed;
-#define OCRDMA_AV_VALID Bit(7)
-#define OCRDMA_AV_VLAN_VALID Bit(1)
+#define OCRDMA_AV_VALID BIT(7)
+#define OCRDMA_AV_VLAN_VALID BIT(1)
struct ocrdma_av {
struct ocrdma_eth_vlan eth_hdr;
@@ -1896,12 +1929,62 @@ struct ocrdma_rdma_stats_resp {
struct ocrdma_rx_dbg_stats rx_dbg_stats;
} __packed;
+enum {
+ OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_CV_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000,
+ OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000,
+ OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E,
+ OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF,
+ OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000,
+ OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00,
+ OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000,
+ OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10,
+ OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000,
+ OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18,
+ OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF
+};
struct mgmt_hba_attribs {
u8 flashrom_version_string[32];
u8 manufacturer_name[32];
u32 supported_modes;
- u32 rsvd0[3];
+ u32 rsvd_eprom_verhi_verlo;
+ u32 mbx_ds_ver;
+ u32 epfw_ds_ver;
u8 ncsi_ver_string[12];
u32 default_extended_timeout;
u8 controller_model_number[32];
@@ -1914,34 +1997,26 @@ struct mgmt_hba_attribs {
u8 driver_version_string[32];
u8 fw_on_flash_version_string[32];
u32 functionalities_supported;
- u16 max_cdblength;
- u8 asic_revision;
- u8 generational_guid[16];
- u8 hba_port_count;
- u16 default_link_down_timeout;
- u8 iscsi_ver_min_max;
- u8 multifunction_device;
- u8 cache_valid;
- u8 hba_status;
- u8 max_domains_supported;
- u8 phy_port;
+ u32 guid0_asicrev_cdblen;
+ u8 generational_guid[12];
+ u32 portcnt_guid15;
+ u32 mfuncdev_iscsi_ldtout;
+ u32 ptpnum_maxdoms_hbast_cv;
u32 firmware_post_status;
u32 hba_mtu[8];
- u32 rsvd1[4];
+ u32 res_asicgen_iscsi_feaures;
+ u32 rsvd1[3];
};
struct mgmt_controller_attrib {
struct mgmt_hba_attribs hba_attribs;
- u16 pci_vendor_id;
- u16 pci_device_id;
- u16 pci_sub_vendor_id;
- u16 pci_sub_system_id;
- u8 pci_bus_number;
- u8 pci_device_number;
- u8 pci_function_number;
- u8 interface_type;
- u64 unique_identifier;
- u32 rsvd0[5];
+ u32 pci_did_vid;
+ u32 pci_ssid_svid;
+ u32 ityp_fnum_devnum_bnum;
+ u32 uid_hi;
+ u32 uid_lo;
+ u32 res_nnetfil;
+ u32 rsvd0[4];
};
struct ocrdma_get_ctrl_attribs_rsp {
@@ -1949,5 +2024,79 @@ struct ocrdma_get_ctrl_attribs_rsp {
struct mgmt_controller_attrib ctrl_attribs;
};
+#define OCRDMA_SUBSYS_DCBX 0x10
+
+enum OCRDMA_DCBX_OPCODE {
+ OCRDMA_CMD_GET_DCBX_CONFIG = 0x01
+};
+
+enum OCRDMA_DCBX_PARAM_TYPE {
+ OCRDMA_PARAMETER_TYPE_ADMIN = 0x00,
+ OCRDMA_PARAMETER_TYPE_OPER = 0x01,
+ OCRDMA_PARAMETER_TYPE_PEER = 0x02
+};
+
+enum OCRDMA_DCBX_APP_PROTO {
+ OCRDMA_APP_PROTO_ROCE = 0x8915
+};
+
+enum OCRDMA_DCBX_PROTO {
+ OCRDMA_PROTO_SELECT_L2 = 0x00,
+ OCRDMA_PROTO_SELECT_L4 = 0x01
+};
+
+enum OCRDMA_DCBX_APP_PARAM {
+ OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF,
+ OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10,
+ OCRDMA_APP_PARAM_VALID_MASK = 0xFF,
+ OCRDMA_APP_PARAM_VALID_SHIFT = 0x18
+};
+
+enum OCRDMA_DCBX_STATE_FLAGS {
+ OCRDMA_STATE_FLAG_ENABLED = 0x01,
+ OCRDMA_STATE_FLAG_ADDVERTISED = 0x02,
+ OCRDMA_STATE_FLAG_WILLING = 0x04,
+ OCRDMA_STATE_FLAG_SYNC = 0x08,
+ OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000,
+ OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000
+};
+
+enum OCRDMA_TCV_AEV_OPV_ST {
+ OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF,
+ OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18,
+ OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10,
+ OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08,
+ OCRDMA_DCBX_STATE_MASK = 0xFF
+};
+
+struct ocrdma_app_parameter {
+ u32 valid_proto_app;
+ u32 oui;
+ u32 app_prio[2];
+};
+
+struct ocrdma_dcbx_cfg {
+ u32 tcv_aev_opv_st;
+ u32 tc_state;
+ u32 pfc_state;
+ u32 qcn_state;
+ u32 appl_state;
+ u32 ll_state;
+ u32 tc_bw[2];
+ u32 tc_prio[8];
+ u32 pfc_prio[2];
+ struct ocrdma_app_parameter app_param[15];
+};
+
+struct ocrdma_get_dcbx_cfg_req {
+ struct ocrdma_mbx_hdr hdr;
+ u32 param_type;
+} __packed;
+
+struct ocrdma_get_dcbx_cfg_rsp {
+ struct ocrdma_mbx_rsp hdr;
+ struct ocrdma_dcbx_cfg cfg;
+} __packed;
#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 8bae8718fa53..4c68305ee781 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
- attr->max_mr_size = ~0ull;
+ attr->max_mr_size = dev->attr.max_mr_size;
attr->page_size_cap = 0xffff000;
attr->vendor_id = dev->nic_info.pdev->vendor;
attr->vendor_part_id = dev->nic_info.pdev->device;
- attr->hw_ver = 0;
+ attr->hw_ver = dev->asic_id;
attr->max_qp = dev->attr.max_qp;
attr->max_ah = OCRDMA_MAX_AH;
attr->max_qp_wr = dev->attr.max_wqe;
@@ -101,7 +101,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
attr->max_srq_sge = dev->attr.max_srq_sge;
attr->max_srq_wr = dev->attr.max_rqe;
attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
- attr->max_fast_reg_page_list_len = 0;
+ attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
attr->max_pkeys = 1;
return 0;
}
@@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
pd->dpp_enabled =
ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
pd->num_dpp_qp =
- pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+ pd->dpp_enabled ? (dev->nic_info.db_page_size /
+ dev->attr.wqe_size) : 0;
}
retry:
@@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
struct ocrdma_pd *pd = uctx->cntxt_pd;
struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
- BUG_ON(uctx->pd_in_use);
+ if (uctx->pd_in_use) {
+ pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+ __func__, dev->id, pd->id);
+ }
uctx->cntxt_pd = NULL;
status = _ocrdma_dealloc_pd(dev, pd);
return status;
@@ -384,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
memset(&resp, 0, sizeof(resp));
resp.ah_tbl_len = ctx->ah_tbl.len;
- resp.ah_tbl_page = ctx->ah_tbl.pa;
+ resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
if (status)
@@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
+
+ /* Don't stop cleanup, in case FW is unresponsive */
+ if (dev->mqe_ctx.fw_error_state) {
+ status = 0;
+ pr_err("%s(%d) fw not responding.\n",
+ __func__, dev->id);
+ }
return status;
}
@@ -859,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
uresp.page_size = PAGE_ALIGN(cq->len);
uresp.num_pages = 1;
uresp.max_hw_cqe = cq->max_hw_cqe;
- uresp.page_addr[0] = cq->pa;
+ uresp.page_addr[0] = virt_to_phys(cq->va);
uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
uresp.db_page_size = dev->nic_info.db_page_size;
uresp.phase_change = cq->phase_change ? 1 : 0;
@@ -1112,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
uresp.sq_dbid = qp->sq.dbid;
uresp.num_sq_pages = 1;
uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
- uresp.sq_page_addr[0] = qp->sq.pa;
+ uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
uresp.num_wqe_allocated = qp->sq.max_cnt;
if (!srq) {
uresp.rq_dbid = qp->rq.dbid;
uresp.num_rq_pages = 1;
uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
- uresp.rq_page_addr[0] = qp->rq.pa;
+ uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
uresp.num_rqe_allocated = qp->rq.max_cnt;
}
uresp.db_page_addr = usr_db;
@@ -1669,7 +1680,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
memset(&uresp, 0, sizeof(uresp));
uresp.rq_dbid = srq->rq.dbid;
uresp.num_rq_pages = 1;
- uresp.rq_page_addr[0] = srq->rq.pa;
+ uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
uresp.rq_page_size = srq->rq.len;
uresp.db_page_addr = dev->nic_info.unmapped_db +
(srq->pd->id * dev->nic_info.db_page_size);
@@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
while (wr) {
+ if (qp->qp_type == IB_QPT_UD &&
+ (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)) {
+ *bad_wr = wr;
+ status = -EINVAL;
+ break;
+ }
if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
wr->num_sge > qp->sq.max_sges) {
*bad_wr = wr;
@@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
*stop = true;
expand = false;
}
+ } else if (is_hw_sq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
@@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
*stop = true;
expand = false;
}
+ } else if (is_hw_rq_empty(qp)) {
+ /* Do nothing */
+ expand = false;
+ *polled = false;
+ *stop = false;
} else {
*polled = true;
expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
@@ -2818,11 +2846,9 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
if (cq->first_arm) {
ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
cq->first_arm = false;
- goto skip_defer;
}
- cq->deferred_arm = true;
-skip_defer:
+ cq->deferred_arm = true;
cq->deferred_sol = sol_needed;
spin_unlock_irqrestore(&cq->cq_lock, flags);
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
index 799a0c3bffc4..6abd3ed3cd51 100644
--- a/drivers/infiniband/hw/qib/qib_debugfs.c
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -193,6 +193,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
struct qib_qp_iter *iter;
loff_t n = *pos;
+ rcu_read_lock();
iter = qib_qp_iter_init(s->private);
if (!iter)
return NULL;
@@ -224,7 +225,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
{
- /* nothing for now */
+ rcu_read_unlock();
}
static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index cab610ccd50e..81854586c081 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- *dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 8d3c78ddc906..729da39c49ed 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1222,7 +1222,7 @@ static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
#define PFX QIB_DRV_NAME ": "
-static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
+static const struct pci_device_id qib_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) },
{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) },
{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) },
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e5740d..636be117b578 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
ibp = &dd->pport[p].ibport_data;
agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
NULL, 0, send_handler,
- NULL, NULL);
+ NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 7fcc150d603c..6ddc0264aad2 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1325,7 +1325,6 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
struct qib_qp *pqp = iter->qp;
struct qib_qp *qp;
- rcu_read_lock();
for (; n < dev->qp_table_size; n++) {
if (pqp)
qp = rcu_dereference(pqp->next);
@@ -1333,18 +1332,11 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
qp = rcu_dereference(dev->qp_table[n]);
pqp = qp;
if (qp) {
- if (iter->qp)
- atomic_dec(&iter->qp->refcount);
- atomic_inc(&qp->refcount);
- rcu_read_unlock();
iter->qp = qp;
iter->n = n;
return 0;
}
}
- rcu_read_unlock();
- if (iter->qp)
- atomic_dec(&iter->qp->refcount);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 2bc1d2b96298..74f90b2619f6 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -52,7 +52,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages,
* Call with current->mm->mmap_sem held.
*/
static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+ struct page **p)
{
unsigned long lock_limit;
size_t got;
@@ -69,7 +69,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
- p + got, vma);
+ p + got, NULL);
if (ret < 0)
goto bail_release;
}
@@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __qib_get_user_pages(start_page, num_pages, p, NULL);
+ ret = __qib_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index fb6d026f92cd..0d0f98695d53 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -490,7 +490,7 @@ out:
/* Start of PCI section */
-static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
+static const struct pci_device_id usnic_ib_pci_ids[] = {
{PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
{0,}
};
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 801a1d6937e4..417de1f32960 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -507,7 +507,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
if (err)
goto out_free_dev;
- if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c639f90cfda4..d7562beb5423 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,7 +86,6 @@ enum {
IPOIB_FLAG_INITIALIZED = 1,
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
- IPOIB_PKEY_STOP = 4,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
@@ -132,6 +131,12 @@ struct ipoib_cb {
u8 hwaddr[INFINIBAND_ALEN];
};
+static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
+ return (struct ipoib_cb *)skb->cb;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -312,7 +317,6 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -473,10 +477,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
@@ -532,8 +537,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf);
void ipoib_setup(struct net_device *dev);
-void ipoib_pkey_poll(struct work_struct *work);
-int ipoib_pkey_dev_delay_open(struct net_device *dev);
+void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616e..6bd5740e2691 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (priv->mcg_dentry)
- debugfs_remove(priv->mcg_dentry);
- if (priv->path_dentry)
- debugfs_remove(priv->path_dentry);
+ debugfs_remove(priv->mcg_dentry);
+ debugfs_remove(priv->path_dentry);
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6a7003ddb0be..72626c348174 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_open(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
- ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
return -1;
}
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev);
if (ret) {
@@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev, flush);
return -1;
}
-static void ipoib_pkey_dev_check_presence(struct net_device *dev)
+void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- u16 pkey_index = 0;
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+ if (!(priv->pkey & 0x7fff) ||
+ ib_find_pkey(priv->ca, priv->port, priv->pkey,
+ &priv->pkey_index))
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
else
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- /* Shutdown the P_Key thread if still active */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work_sync(&priv->pkey_poll_task);
- mutex_unlock(&pkey_mutex);
- }
-
ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_dev_flush(dev);
@@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
+ if (ipoib_ib_dev_open(dev, 1)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
return 1;
}
+/*
+ * returns 0 if pkey value was found in a different slot.
+ */
+static inline int update_child_pkey(struct ipoib_dev_priv *priv)
+{
+ u16 old_index = priv->pkey_index;
+
+ priv->pkey_index = 0;
+ ipoib_pkey_dev_check_presence(priv->dev);
+
+ if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+ (old_index == priv->pkey_index))
+ return 1;
+ return 0;
+}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level)
{
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
- u16 new_index;
int result;
down_read(&priv->vlan_rwsem);
@@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
up_read(&priv->vlan_rwsem);
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
- /* for non-child devices must check/update the pkey value here */
- if (level == IPOIB_FLUSH_HEAVY &&
- !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- update_parent_pkey(priv);
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
+ level != IPOIB_FLUSH_HEAVY) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
+ /* interface is down. update pkey and leave. */
+ if (level == IPOIB_FLUSH_HEAVY) {
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+ update_parent_pkey(priv);
+ else
+ update_child_pkey(priv);
+ }
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return;
}
@@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
* (parent) devices should always takes what present in pkey index 0
*/
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
- if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- ipoib_ib_dev_down(dev, 0);
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_pkey_dev_delay_open(dev))
- return;
- }
- /* restart QP only if P_Key index is changed */
- if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
- new_index == priv->pkey_index) {
+ result = update_child_pkey(priv);
+ if (result) {
+ /* restart QP only if P_Key index is changed */
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
- priv->pkey_index = new_index;
+
} else {
result = update_parent_pkey(priv);
/* restart QP only if P_Key value changed */
@@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev, 0);
if (level == IPOIB_FLUSH_HEAVY) {
- ipoib_ib_dev_stop(dev, 0);
- ipoib_ib_dev_open(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ ipoib_ib_dev_stop(dev, 0);
+ if (ipoib_ib_dev_open(dev, 0) != 0)
+ return;
+ if (netif_queue_stopped(dev))
+ netif_start_queue(dev);
}
/*
@@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
ipoib_transport_dev_cleanup(dev);
}
-/*
- * Delayed P_Key Assigment Interim Support
- *
- * The following is initial implementation of delayed P_Key assigment
- * mechanism. It is using the same approach implemented for the multicast
- * group join. The single goal of this implementation is to quickly address
- * Bug #2507. This implementation will probably be removed when the P_Key
- * change async notification is available.
- */
-
-void ipoib_pkey_poll(struct work_struct *work)
-{
- struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
- struct net_device *dev = priv->dev;
-
- ipoib_pkey_dev_check_presence(dev);
-
- if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
- ipoib_open(dev);
- else {
- mutex_lock(&pkey_mutex);
- if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- }
-}
-
-int ipoib_pkey_dev_delay_open(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- /* Look for the interface pkey value in the IB Port P_Key table and */
- /* set the interface pkey assigment flag */
- ipoib_pkey_dev_check_presence(dev);
- /* P_Key value not assigned yet - start polling */
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- mutex_lock(&pkey_mutex);
- clear_bit(IPOIB_PKEY_STOP, &priv->flags);
- queue_delayed_work(ipoib_workqueue,
- &priv->pkey_poll_task,
- HZ);
- mutex_unlock(&pkey_mutex);
- return 1;
- }
-
- return 0;
-}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4e675f4fecc9..58b5aa3b6f2d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_pkey_dev_delay_open(dev))
- return 0;
-
- if (ipoib_ib_dev_open(dev))
+ if (ipoib_ib_dev_open(dev, 1)) {
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+ return 0;
goto err_disable;
+ }
if (ipoib_ib_dev_up(dev))
goto err_stop;
@@ -716,7 +716,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh;
- struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
+ struct ipoib_cb *cb = ipoib_skb_cb(skb);
struct ipoib_header *header;
unsigned long flags;
@@ -813,7 +813,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
const void *daddr, const void *saddr, unsigned len)
{
struct ipoib_header *header;
- struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
+ struct ipoib_cb *cb = ipoib_skb_cb(skb);
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
@@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
dev->tx_queue_len = ipoib_sendq_size * 2;
dev->features = (NETIF_F_VLAN_CHALLENGED |
NETIF_F_HIGHDMA);
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
@@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index d4e005720d01..ffb83b5f7e80 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -529,21 +529,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
port_attr.state);
return;
}
+ priv->local_lid = port_attr.lid;
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
- {
- struct ib_port_attr attr;
-
- if (!ib_query_port(priv->ca, priv->port, &attr))
- priv->local_lid = attr.lid;
- else
- ipoib_warn(priv, "ib_query_port failed\n");
- }
-
if (!priv->broadcast) {
struct ipoib_mcast *broadcast;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index eb7973957a6e..f42ab14105ac 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -83,7 +83,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
int iser_debug_level = 0;
bool iser_pi_enable = false;
-int iser_pi_guard = 0;
+int iser_pi_guard = 1;
MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
MODULE_LICENSE("Dual BSD/GPL");
@@ -97,14 +97,24 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
static struct workqueue_struct *release_wq;
struct iser_global ig;
+/*
+ * iscsi_iser_recv() - Process a successfull recv completion
+ * @conn: iscsi connection
+ * @hdr: iscsi header
+ * @rx_data: buffer containing receive data payload
+ * @rx_data_len: length of rx_data
+ *
+ * Notes: In case of data length errors or iscsi PDU completion failures
+ * this routine will signal iscsi layer of connection failure.
+ */
void
-iscsi_iser_recv(struct iscsi_conn *conn,
- struct iscsi_hdr *hdr, char *rx_data, int rx_data_len)
+iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ char *rx_data, int rx_data_len)
{
int rc = 0;
int datalen;
@@ -135,20 +145,30 @@ error:
iscsi_conn_failure(conn, rc);
}
-static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
+/**
+ * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
+ * @task: iscsi task
+ * @opcode: iscsi command opcode
+ *
+ * Netes: This routine can't fail, just assign iscsi task
+ * hdr and max hdr size.
+ */
+static int
+iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
{
struct iscsi_iser_task *iser_task = task->dd_data;
task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
task->hdr_max = sizeof(iser_task->desc.iscsi_header);
+
return 0;
}
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc)
{
- struct iser_conn *ib_conn = task->conn->dd_data;
- struct iser_device *device = ib_conn->device;
+ struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_device *device = iser_conn->ib_conn.device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
@@ -162,14 +182,18 @@ int iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
tx_desc->tx_sg[0].lkey = device->mr->lkey;
- iser_task->ib_conn = ib_conn;
+ iser_task->iser_conn = iser_conn;
return 0;
}
+
/**
- * iscsi_iser_task_init - Initialize task
+ * iscsi_iser_task_init() - Initialize iscsi-iser task
* @task: iscsi task
*
* Initialize the task for the scsi command or mgmt command.
+ *
+ * Return: Returns zero on success or -ENOMEM when failing
+ * to init task headers (dma mapping error).
*/
static int
iscsi_iser_task_init(struct iscsi_task *task)
@@ -191,7 +215,7 @@ iscsi_iser_task_init(struct iscsi_task *task)
}
/**
- * iscsi_iser_mtask_xmit - xmit management(immediate) task
+ * iscsi_iser_mtask_xmit() - xmit management (immediate) task
* @conn: iscsi connection
* @task: task management task
*
@@ -249,6 +273,12 @@ iscsi_iser_task_xmit_unsol_data_exit:
return error;
}
+/**
+ * iscsi_iser_task_xmit() - xmit iscsi-iser task
+ * @task: iscsi task
+ *
+ * Return: zero on success or escalates $error on failure.
+ */
static int
iscsi_iser_task_xmit(struct iscsi_task *task)
{
@@ -286,12 +316,24 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
return error;
}
+/**
+ * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
+ * @task: iscsi task
+ *
+ * Notes: In case the RDMA device is already NULL (might have
+ * been removed in DEVICE_REMOVAL CM event it will bail-out
+ * without doing dma unmapping.
+ */
static void iscsi_iser_cleanup_task(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- struct iser_conn *ib_conn = task->conn->dd_data;
- struct iser_device *device = ib_conn->device;
+ struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_device *device = iser_conn->ib_conn.device;
+
+ /* DEVICE_REMOVAL event might have already released the device */
+ if (!device)
+ return;
ib_dma_unmap_single(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -306,7 +348,20 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
}
}
-static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+/**
+ * iscsi_iser_check_protection() - check protection information status of task.
+ * @task: iscsi task
+ * @sector: error sector if exsists (output)
+ *
+ * Return: zero if no data-integrity errors have occured
+ * 0x1: data-integrity error occured in the guard-block
+ * 0x2: data-integrity error occured in the reference tag
+ * 0x3: data-integrity error occured in the application tag
+ *
+ * In addition the error sector is marked.
+ */
+static u8
+iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
{
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -318,8 +373,17 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
sector);
}
+/**
+ * iscsi_iser_conn_create() - create a new iscsi-iser connection
+ * @cls_session: iscsi class connection
+ * @conn_idx: connection index within the session (for MCS)
+ *
+ * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
+ * otherwise.
+ */
static struct iscsi_cls_conn *
-iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
+iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
+ uint32_t conn_idx)
{
struct iscsi_conn *conn;
struct iscsi_cls_conn *cls_conn;
@@ -338,14 +402,25 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
return cls_conn;
}
+/**
+ * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
+ * @cls_session: iscsi class session
+ * @cls_conn: iscsi class connection
+ * @transport_eph: transport end-point handle
+ * @is_leading: indicate if this is the session leading connection (MCS)
+ *
+ * Return: zero on success, $error if iscsi_conn_bind fails and
+ * -EINVAL in case end-point doesn't exsits anymore or iser connection
+ * state is not UP (teardown already started).
+ */
static int
iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
- struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+ struct iscsi_cls_conn *cls_conn,
+ uint64_t transport_eph,
int is_leading)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_session *session;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct iscsi_endpoint *ep;
int error;
@@ -361,56 +436,100 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
(unsigned long long)transport_eph);
return -EINVAL;
}
- ib_conn = ep->dd_data;
+ iser_conn = ep->dd_data;
+
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_UP) {
+ error = -EINVAL;
+ iser_err("iser_conn %p state is %d, teardown started\n",
+ iser_conn, iser_conn->state);
+ goto out;
+ }
- session = conn->session;
- if (iser_alloc_rx_descriptors(ib_conn, session))
- return -ENOMEM;
+ error = iser_alloc_rx_descriptors(iser_conn, conn->session);
+ if (error)
+ goto out;
/* binds the iSER connection retrieved from the previously
* connected ep_handle to the iSCSI layer connection. exchanges
* connection pointers */
- iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn);
+ iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
- conn->dd_data = ib_conn;
- ib_conn->iscsi_conn = conn;
+ conn->dd_data = iser_conn;
+ iser_conn->iscsi_conn = conn;
- return 0;
+out:
+ mutex_unlock(&iser_conn->state_mutex);
+ return error;
}
+/**
+ * iscsi_iser_conn_start() - start iscsi-iser connection
+ * @cls_conn: iscsi class connection
+ *
+ * Notes: Here iser intialize (or re-initialize) stop_completion as
+ * from this point iscsi must call conn_stop in session/connection
+ * teardown so iser transport must wait for it.
+ */
static int
iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
{
struct iscsi_conn *iscsi_conn;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
iscsi_conn = cls_conn->dd_data;
- ib_conn = iscsi_conn->dd_data;
- reinit_completion(&ib_conn->stop_completion);
+ iser_conn = iscsi_conn->dd_data;
+ reinit_completion(&iser_conn->stop_completion);
return iscsi_conn_start(cls_conn);
}
+/**
+ * iscsi_iser_conn_stop() - stop iscsi-iser connection
+ * @cls_conn: iscsi class connection
+ * @flag: indicate if recover or terminate (passed as is)
+ *
+ * Notes: Calling iscsi_conn_stop might theoretically race with
+ * DEVICE_REMOVAL event and dereference a previously freed RDMA device
+ * handle, so we call it under iser the state lock to protect against
+ * this kind of race.
+ */
static void
iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
- iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
- iscsi_conn_stop(cls_conn, flag);
+ iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
/*
* Userspace may have goofed up and not bound the connection or
* might have only partially setup the connection.
*/
- if (ib_conn) {
+ if (iser_conn) {
+ mutex_lock(&iser_conn->state_mutex);
+ iscsi_conn_stop(cls_conn, flag);
+ iser_conn_terminate(iser_conn);
+
+ /* unbind */
+ iser_conn->iscsi_conn = NULL;
conn->dd_data = NULL;
- complete(&ib_conn->stop_completion);
+
+ complete(&iser_conn->stop_completion);
+ mutex_unlock(&iser_conn->state_mutex);
+ } else {
+ iscsi_conn_stop(cls_conn, flag);
}
}
-static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
+/**
+ * iscsi_iser_session_destroy() - destroy iscsi-iser session
+ * @cls_session: iscsi class session
+ *
+ * Removes and free iscsi host.
+ */
+static void
+iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
{
struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
@@ -430,6 +549,16 @@ iser_dif_prot_caps(int prot_caps)
SHOST_DIX_TYPE3_PROTECTION : 0);
}
+/**
+ * iscsi_iser_session_create() - create an iscsi-iser session
+ * @ep: iscsi end-point handle
+ * @cmds_max: maximum commands in this session
+ * @qdepth: session command queue depth
+ * @initial_cmdsn: initiator command sequnce number
+ *
+ * Allocates and adds a scsi host, expose DIF supprot if
+ * exists, and sets up an iscsi session.
+ */
static struct iscsi_cls_session *
iscsi_iser_session_create(struct iscsi_endpoint *ep,
uint16_t cmds_max, uint16_t qdepth,
@@ -438,7 +567,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn = NULL;
+ struct iser_conn *iser_conn = NULL;
+ struct ib_conn *ib_conn;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
@@ -455,7 +585,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* the leading conn's ep so this will be NULL;
*/
if (ep) {
- ib_conn = ep->dd_data;
+ iser_conn = ep->dd_data;
+ ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
@@ -467,8 +598,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
}
- if (iscsi_host_add(shost,
- ep ? ib_conn->device->ib_device->dma_device : NULL))
+ if (iscsi_host_add(shost, ep ?
+ ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
@@ -540,6 +671,13 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
return 0;
}
+/**
+ * iscsi_iser_set_param() - set class connection parameter
+ * @cls_conn: iscsi class connection
+ * @stats: iscsi stats to output
+ *
+ * Output connection statistics.
+ */
static void
iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
{
@@ -568,18 +706,18 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
enum iscsi_param param, char *buf)
{
- struct iser_conn *ib_conn = ep->dd_data;
+ struct iser_conn *iser_conn = ep->dd_data;
int len;
switch (param) {
case ISCSI_PARAM_CONN_PORT:
case ISCSI_PARAM_CONN_ADDRESS:
- if (!ib_conn || !ib_conn->cma_id)
+ if (!iser_conn || !iser_conn->ib_conn.cma_id)
return -ENOTCONN;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
- &ib_conn->cma_id->route.addr.dst_addr,
- param, buf);
+ &iser_conn->ib_conn.cma_id->route.addr.dst_addr,
+ param, buf);
break;
default:
return -ENOSYS;
@@ -588,48 +726,85 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
return len;
}
+/**
+ * iscsi_iser_ep_connect() - Initiate iSER connection establishment
+ * @shost: scsi_host
+ * @dst_addr: destination address
+ * @non-blocking: indicate if routine can block
+ *
+ * Allocate an iscsi endpoint, an iser_conn structure and bind them.
+ * After that start RDMA connection establishment via rdma_cm. We
+ * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
+ * the endpoint will be destroyed at ep_disconnect while iser_conn will
+ * cleanup its resources asynchronuously.
+ *
+ * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
+ * if fails.
+ */
static struct iscsi_endpoint *
iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
int non_blocking)
{
int err;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct iscsi_endpoint *ep;
- ep = iscsi_create_endpoint(sizeof(*ib_conn));
+ ep = iscsi_create_endpoint(0);
if (!ep)
return ERR_PTR(-ENOMEM);
- ib_conn = ep->dd_data;
- ib_conn->ep = ep;
- iser_conn_init(ib_conn);
+ iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
+ if (!iser_conn) {
+ err = -ENOMEM;
+ goto failure;
+ }
+
+ ep->dd_data = iser_conn;
+ iser_conn->ep = ep;
+ iser_conn_init(iser_conn);
- err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
- non_blocking);
+ err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
if (err)
- return ERR_PTR(err);
+ goto failure;
return ep;
+failure:
+ iscsi_destroy_endpoint(ep);
+ return ERR_PTR(err);
}
+/**
+ * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
+ * @ep: iscsi endpoint (created at ep_connect)
+ * @timeout_ms: polling timeout allowed in ms.
+ *
+ * This routine boils down to waiting for up_completion signaling
+ * that cma_id got CONNECTED event.
+ *
+ * Return: 1 if succeeded in connection establishment, 0 if timeout expired
+ * (libiscsi will retry will kick in) or -1 if interrupted by signal
+ * or more likely iser connection state transitioned to TEMINATING or
+ * DOWN during the wait period.
+ */
static int
iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
int rc;
- ib_conn = ep->dd_data;
- rc = wait_event_interruptible_timeout(ib_conn->wait,
- ib_conn->state == ISER_CONN_UP,
- msecs_to_jiffies(timeout_ms));
-
+ iser_conn = ep->dd_data;
+ rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
+ msecs_to_jiffies(timeout_ms));
/* if conn establishment failed, return error code to iscsi */
- if (!rc &&
- (ib_conn->state == ISER_CONN_TERMINATING ||
- ib_conn->state == ISER_CONN_DOWN))
- rc = -1;
+ if (rc == 0) {
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state == ISER_CONN_TERMINATING ||
+ iser_conn->state == ISER_CONN_DOWN)
+ rc = -1;
+ mutex_unlock(&iser_conn->state_mutex);
+ }
- iser_info("ib conn %p rc = %d\n", ib_conn, rc);
+ iser_info("ib conn %p rc = %d\n", iser_conn, rc);
if (rc > 0)
return 1; /* success, this is the equivalent of POLLOUT */
@@ -639,26 +814,43 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
return rc; /* signal */
}
+/**
+ * iscsi_iser_ep_disconnect() - Initiate connection teardown process
+ * @ep: iscsi endpoint handle
+ *
+ * This routine is not blocked by iser and RDMA termination process
+ * completion as we queue a deffered work for iser/RDMA destruction
+ * and cleanup or actually call it immediately in case we didn't pass
+ * iscsi conn bind/start stage, thus it is safe.
+ */
static void
iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
+
+ iser_conn = ep->dd_data;
+ iser_info("ep %p iser conn %p state %d\n",
+ ep, iser_conn, iser_conn->state);
- ib_conn = ep->dd_data;
- iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
- iser_conn_terminate(ib_conn);
+ mutex_lock(&iser_conn->state_mutex);
+ iser_conn_terminate(iser_conn);
/*
- * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
- * call and ISER_CONN_DOWN state before freeing the iser resources.
- * otherwise we are safe to free resources immediately.
+ * if iser_conn and iscsi_conn are bound, we must wait for
+ * iscsi_conn_stop and flush errors completion before freeing
+ * the iser resources. Otherwise we are safe to free resources
+ * immediately.
*/
- if (ib_conn->iscsi_conn) {
- INIT_WORK(&ib_conn->release_work, iser_release_work);
- queue_work(release_wq, &ib_conn->release_work);
+ if (iser_conn->iscsi_conn) {
+ INIT_WORK(&iser_conn->release_work, iser_release_work);
+ queue_work(release_wq, &iser_conn->release_work);
+ mutex_unlock(&iser_conn->state_mutex);
} else {
- iser_conn_release(ib_conn);
+ iser_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&iser_conn->state_mutex);
+ iser_conn_release(iser_conn);
}
+ iscsi_destroy_endpoint(ep);
}
static umode_t iser_attr_is_visible(int param_type, int param)
@@ -819,7 +1011,7 @@ register_transport_failure:
static void __exit iser_exit(void)
{
- struct iser_conn *ib_conn, *n;
+ struct iser_conn *iser_conn, *n;
int connlist_empty;
iser_dbg("Removing iSER datamover...\n");
@@ -832,8 +1024,9 @@ static void __exit iser_exit(void)
if (!connlist_empty) {
iser_err("Error cleanup stage completed but we still have iser "
"connections, destroying them anyway.\n");
- list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
- iser_conn_release(ib_conn);
+ list_for_each_entry_safe(iser_conn, n, &ig.connlist,
+ conn_list) {
+ iser_conn_release(iser_conn);
}
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 97cd385bf7f7..cd4174ca9a76 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,39 +69,38 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.4"
+#define DRV_VER "1.4.8"
-#define iser_dbg(fmt, arg...) \
- do { \
- if (iser_debug_level > 2) \
- printk(KERN_DEBUG PFX "%s:" fmt,\
- __func__ , ## arg); \
+#define iser_dbg(fmt, arg...) \
+ do { \
+ if (iser_debug_level > 2) \
+ printk(KERN_DEBUG PFX "%s: " fmt,\
+ __func__ , ## arg); \
} while (0)
#define iser_warn(fmt, arg...) \
do { \
if (iser_debug_level > 0) \
- pr_warn(PFX "%s:" fmt, \
+ pr_warn(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
if (iser_debug_level > 1) \
- pr_info(PFX "%s:" fmt, \
+ pr_info(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_err(fmt, arg...) \
do { \
- printk(KERN_ERR PFX "%s:" fmt, \
+ printk(KERN_ERR PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define SHIFT_4K 12
#define SIZE_4K (1ULL << SHIFT_4K)
#define MASK_4K (~(SIZE_4K-1))
-
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
#define ISER_DEF_XMIT_CMDS_DEFAULT 512
@@ -145,18 +144,32 @@
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
+#define ISER_WC_BATCH_COUNT 16
+#define ISER_SIGNAL_CMD_COUNT 32
+
#define ISER_VER 0x10
#define ISER_WSV 0x08
#define ISER_RSV 0x04
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
+#define ISER_BEACON_WRID 0xfffffffffffffffeULL
+/**
+ * struct iser_hdr - iSER header
+ *
+ * @flags: flags support (zbva, remote_inv)
+ * @rsvd: reserved
+ * @write_stag: write rkey
+ * @write_va: write virtual address
+ * @reaf_stag: read rkey
+ * @read_va: read virtual address
+ */
struct iser_hdr {
u8 flags;
u8 rsvd[3];
- __be32 write_stag; /* write rkey */
+ __be32 write_stag;
__be64 write_va;
- __be32 read_stag; /* read rkey */
+ __be32 read_stag;
__be64 read_va;
} __attribute__((packed));
@@ -179,7 +192,7 @@ struct iser_cm_hdr {
/* Length of an object name string */
#define ISER_OBJECT_NAME_SIZE 64
-enum iser_ib_conn_state {
+enum iser_conn_state {
ISER_CONN_INIT, /* descriptor allocd, no conn */
ISER_CONN_PENDING, /* in the process of being established */
ISER_CONN_UP, /* up and running */
@@ -200,23 +213,42 @@ enum iser_data_dir {
ISER_DIRS_NUM
};
+/**
+ * struct iser_data_buf - iSER data buffer
+ *
+ * @buf: pointer to the sg list
+ * @size: num entries of this sg
+ * @data_len: total beffer byte len
+ * @dma_nents: returned by dma_map_sg
+ * @copy_buf: allocated copy buf for SGs unaligned
+ * for rdma which are copied
+ * @sg_single: SG-ified clone of a non SG SC or
+ * unaligned SG
+ */
struct iser_data_buf {
- void *buf; /* pointer to the sg list */
- unsigned int size; /* num entries of this sg */
- unsigned long data_len; /* total data len */
- unsigned int dma_nents; /* returned by dma_map_sg */
- char *copy_buf; /* allocated copy buf for SGs unaligned *
- * for rdma which are copied */
- struct scatterlist sg_single; /* SG-ified clone of a non SG SC or *
- * unaligned SG */
+ void *buf;
+ unsigned int size;
+ unsigned long data_len;
+ unsigned int dma_nents;
+ char *copy_buf;
+ struct scatterlist sg_single;
};
/* fwd declarations */
struct iser_device;
-struct iser_cq_desc;
struct iscsi_iser_task;
struct iscsi_endpoint;
+/**
+ * struct iser_mem_reg - iSER memory registration info
+ *
+ * @lkey: MR local key
+ * @rkey: MR remote key
+ * @va: MR start address (buffer va)
+ * @len: MR length
+ * @mem_h: pointer to registration context (FMR/Fastreg)
+ * @is_mr: indicates weather we registered the buffer
+ */
struct iser_mem_reg {
u32 lkey;
u32 rkey;
@@ -226,11 +258,20 @@ struct iser_mem_reg {
int is_mr;
};
+/**
+ * struct iser_regd_buf - iSER buffer registration desc
+ *
+ * @reg: memory registration info
+ * @virt_addr: virtual address of buffer
+ * @device: reference to iser device
+ * @direction: dma direction (for dma_unmap)
+ * @data_size: data buffer size in bytes
+ */
struct iser_regd_buf {
- struct iser_mem_reg reg; /* memory registration info */
+ struct iser_mem_reg reg;
void *virt_addr;
- struct iser_device *device; /* device->device for dma_unmap */
- enum dma_data_direction direction; /* direction for dma_unmap */
+ struct iser_device *device;
+ enum dma_data_direction direction;
unsigned int data_size;
};
@@ -240,19 +281,39 @@ enum iser_desc_type {
ISCSI_TX_DATAOUT
};
+/**
+ * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ *
+ * @iser_header: iser header
+ * @iscsi_header: iscsi header
+ * @type: command/control/dataout
+ * @dam_addr: header buffer dma_address
+ * @tx_sg: sg[0] points to iser/iscsi headers
+ * sg[1] optionally points to either of immediate data
+ * unsolicited data-out or control
+ * @num_sge: number sges used on this TX task
+ */
struct iser_tx_desc {
struct iser_hdr iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
- /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
- of immediate data, unsolicited data-out or control (login,text) */
struct ib_sge tx_sg[2];
int num_sge;
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
sizeof(u64) + sizeof(struct ib_sge)))
+/**
+ * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ *
+ * @iser_header: iser header
+ * @iscsi_header: iscsi header
+ * @data: received data segment
+ * @dma_addr: receive buffer dma address
+ * @rx_sg: ib_sge of receive buffer
+ * @pad: for sense data TODO: Modify to maximum sense length supported
+ */
struct iser_rx_desc {
struct iser_hdr iser_header;
struct iscsi_hdr iscsi_header;
@@ -265,25 +326,59 @@ struct iser_rx_desc {
#define ISER_MAX_CQ 4
struct iser_conn;
+struct ib_conn;
struct iscsi_iser_task;
+/**
+ * struct iser_comp - iSER completion context
+ *
+ * @device: pointer to device handle
+ * @cq: completion queue
+ * @wcs: work completion array
+ * @tasklet: Tasklet handle
+ * @active_qps: Number of active QPs attached
+ * to completion context
+ */
+struct iser_comp {
+ struct iser_device *device;
+ struct ib_cq *cq;
+ struct ib_wc wcs[ISER_WC_BATCH_COUNT];
+ struct tasklet_struct tasklet;
+ int active_qps;
+};
+
+/**
+ * struct iser_device - iSER device handle
+ *
+ * @ib_device: RDMA device
+ * @pd: Protection Domain for this device
+ * @dev_attr: Device attributes container
+ * @mr: Global DMA memory region
+ * @event_handler: IB events handle routine
+ * @ig_list: entry in devices list
+ * @refcount: Reference counter, dominated by open iser connections
+ * @comps_used: Number of completion contexts used, Min between online
+ * cpus and device max completion vectors
+ * @comps: Dinamically allocated array of completion handlers
+ * Memory registration pool Function pointers (FMR or Fastreg):
+ * @iser_alloc_rdma_reg_res: Allocation of memory regions pool
+ * @iser_free_rdma_reg_res: Free of memory regions pool
+ * @iser_reg_rdma_mem: Memory registration routine
+ * @iser_unreg_rdma_mem: Memory deregistration routine
+ */
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
struct ib_device_attr dev_attr;
- struct ib_cq *rx_cq[ISER_MAX_CQ];
- struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr;
- struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
struct ib_event_handler event_handler;
- struct list_head ig_list; /* entry in ig devices list */
+ struct list_head ig_list;
int refcount;
- int cq_active_qps[ISER_MAX_CQ];
- int cqs_used;
- struct iser_cq_desc *cq_desc;
- int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ int comps_used;
+ struct iser_comp comps[ISER_MAX_CQ];
+ int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
unsigned cmds_max);
- void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
@@ -301,76 +396,160 @@ enum iser_reg_indicator {
ISER_FASTREG_PROTECTED = 1 << 3,
};
+/**
+ * struct iser_pi_context - Protection information context
+ *
+ * @prot_mr: protection memory region
+ * @prot_frpl: protection fastreg page list
+ * @sig_mr: signature feature enabled memory region
+ */
struct iser_pi_context {
struct ib_mr *prot_mr;
struct ib_fast_reg_page_list *prot_frpl;
struct ib_mr *sig_mr;
};
+/**
+ * struct fast_reg_descriptor - Fast registration descriptor
+ *
+ * @list: entry in connection fastreg pool
+ * @data_mr: data memory region
+ * @data_frpl: data fastreg page list
+ * @pi_ctx: protection information context
+ * @reg_indicators: fast registration indicators
+ */
struct fast_reg_descriptor {
struct list_head list;
- /* For fast registration - FRWR */
struct ib_mr *data_mr;
struct ib_fast_reg_page_list *data_frpl;
struct iser_pi_context *pi_ctx;
- /* registration indicators container */
u8 reg_indicators;
};
+/**
+ * struct ib_conn - Infiniband related objects
+ *
+ * @cma_id: rdma_cm connection maneger handle
+ * @qp: Connection Queue-pair
+ * @post_recv_buf_count: post receive counter
+ * @rx_wr: receive work request for batch posts
+ * @device: reference to iser device
+ * @comp: iser completion context
+ * @pi_support: Indicate device T10-PI support
+ * @beacon: beacon send wr to signal all flush errors were drained
+ * @flush_comp: completes when all connection completions consumed
+ * @lock: protects fmr/fastreg pool
+ * @union.fmr:
+ * @pool: FMR pool for fast registrations
+ * @page_vec: page vector to hold mapped commands pages
+ * used for registration
+ * @union.fastreg:
+ * @pool: Fast registration descriptors pool for fast
+ * registrations
+ * @pool_size: Size of pool
+ */
+struct ib_conn {
+ struct rdma_cm_id *cma_id;
+ struct ib_qp *qp;
+ int post_recv_buf_count;
+ struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ struct iser_device *device;
+ struct iser_comp *comp;
+ bool pi_support;
+ struct ib_send_wr beacon;
+ struct completion flush_comp;
+ spinlock_t lock;
+ union {
+ struct {
+ struct ib_fmr_pool *pool;
+ struct iser_page_vec *page_vec;
+ } fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } fastreg;
+ };
+};
+
+/**
+ * struct iser_conn - iSER connection context
+ *
+ * @ib_conn: connection RDMA resources
+ * @iscsi_conn: link to matching iscsi connection
+ * @ep: transport handle
+ * @state: connection logical state
+ * @qp_max_recv_dtos: maximum number of data outs, corresponds
+ * to max number of post recvs
+ * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
+ * @min_posted_rx: (qp_max_recv_dtos >> 2)
+ * @name: connection peer portal
+ * @release_work: deffered work for release job
+ * @state_mutex: protects iser onnection state
+ * @stop_completion: conn_stop completion
+ * @ib_completion: RDMA cleanup completion
+ * @up_completion: connection establishment completed
+ * (state is ISER_CONN_UP)
+ * @conn_list: entry in ig conn list
+ * @login_buf: login data buffer (stores login parameters)
+ * @login_req_buf: login request buffer
+ * @login_req_dma: login request buffer dma address
+ * @login_resp_buf: login response buffer
+ * @login_resp_dma: login response buffer dma address
+ * @rx_desc_head: head of rx_descs cyclic buffer
+ * @rx_descs: rx buffers array (cyclic buffer)
+ * @num_rx_descs: number of rx descriptors
+ */
struct iser_conn {
+ struct ib_conn ib_conn;
struct iscsi_conn *iscsi_conn;
struct iscsi_endpoint *ep;
- enum iser_ib_conn_state state; /* rdma connection state */
- atomic_t refcount;
- spinlock_t lock; /* used for state changes */
- struct iser_device *device; /* device context */
- struct rdma_cm_id *cma_id; /* CMA ID */
- struct ib_qp *qp; /* QP */
- wait_queue_head_t wait; /* waitq for conn/disconn */
- unsigned qp_max_recv_dtos; /* num of rx buffers */
- unsigned qp_max_recv_dtos_mask; /* above minus 1 */
- unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
- int post_recv_buf_count; /* posted rx count */
- atomic_t post_send_buf_count; /* posted tx count */
+ enum iser_conn_state state;
+ unsigned qp_max_recv_dtos;
+ unsigned qp_max_recv_dtos_mask;
+ unsigned min_posted_rx;
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
+ struct mutex state_mutex;
struct completion stop_completion;
- struct list_head conn_list; /* entry in ig conn list */
+ struct completion ib_completion;
+ struct completion up_completion;
+ struct list_head conn_list;
char *login_buf;
char *login_req_buf, *login_resp_buf;
u64 login_req_dma, login_resp_dma;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
- struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
- bool pi_support;
-
- /* Connection memory registration pool */
- union {
- struct {
- struct ib_fmr_pool *pool; /* pool of IB FMRs */
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
- } fmr;
- struct {
- struct list_head pool;
- int pool_size;
- } fastreg;
- };
+ u32 num_rx_descs;
};
+/**
+ * struct iscsi_iser_task - iser task context
+ *
+ * @desc: TX descriptor
+ * @iser_conn: link to iser connection
+ * @status: current task status
+ * @sc: link to scsi command
+ * @command_sent: indicate if command was sent
+ * @dir: iser data direction
+ * @rdma_regd: task rdma registration desc
+ * @data: iser data buffer desc
+ * @data_copy: iser data copy buffer desc (bounce buffer)
+ * @prot: iser protection buffer desc
+ * @prot_copy: iser protection copy buffer desc (bounce buffer)
+ */
struct iscsi_iser_task {
struct iser_tx_desc desc;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
enum iser_task_status status;
struct scsi_cmnd *sc;
- int command_sent; /* set if command sent */
- int dir[ISER_DIRS_NUM]; /* set if dir use*/
- struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
- struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
- struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
- struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */
- struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */
+ int command_sent;
+ int dir[ISER_DIRS_NUM];
+ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];
+ struct iser_data_buf data[ISER_DIRS_NUM];
+ struct iser_data_buf data_copy[ISER_DIRS_NUM];
+ struct iser_data_buf prot[ISER_DIRS_NUM];
+ struct iser_data_buf prot_copy[ISER_DIRS_NUM];
};
struct iser_page_vec {
@@ -380,17 +559,20 @@ struct iser_page_vec {
int data_size;
};
-struct iser_cq_desc {
- struct iser_device *device;
- int cq_index;
-};
-
+/**
+ * struct iser_global: iSER global context
+ *
+ * @device_list_mutex: protects device_list
+ * @device_list: iser devices global list
+ * @connlist_mutex: protects connlist
+ * @connlist: iser connections global list
+ * @desc_cache: kmem cache for tx dataout
+ */
struct iser_global {
- struct mutex device_list_mutex;/* */
- struct list_head device_list; /* all iSER devices */
+ struct mutex device_list_mutex;
+ struct list_head device_list;
struct mutex connlist_mutex;
- struct list_head connlist; /* all iSER IB connections */
-
+ struct list_head connlist;
struct kmem_cache *desc_cache;
};
@@ -399,9 +581,6 @@ extern int iser_debug_level;
extern bool iser_pi_enable;
extern int iser_pi_guard;
-/* allocate connection resources needed for rdma functionality */
-int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
-
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task);
@@ -413,29 +592,30 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iscsi_data *hdr);
void iscsi_iser_recv(struct iscsi_conn *conn,
- struct iscsi_hdr *hdr,
- char *rx_data,
- int rx_data_len);
+ struct iscsi_hdr *hdr,
+ char *rx_data,
+ int rx_data_len);
-void iser_conn_init(struct iser_conn *ib_conn);
+void iser_conn_init(struct iser_conn *iser_conn);
-void iser_conn_release(struct iser_conn *ib_conn);
+void iser_conn_release(struct iser_conn *iser_conn);
-void iser_conn_terminate(struct iser_conn *ib_conn);
+int iser_conn_terminate(struct iser_conn *iser_conn);
void iser_release_work(struct work_struct *work);
void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct iser_conn *ib_conn);
+ unsigned long dto_xfer_len,
+ struct ib_conn *ib_conn);
-void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
+void iser_snd_completion(struct iser_tx_desc *desc,
+ struct ib_conn *ib_conn);
void iser_task_rdma_init(struct iscsi_iser_task *task);
void iser_task_rdma_finalize(struct iscsi_iser_task *task);
-void iser_free_rx_descriptors(struct iser_conn *ib_conn);
+void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
@@ -447,38 +627,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
- int non_blocking);
+int iser_connect(struct iser_conn *iser_conn,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
+ int non_blocking);
-int iser_reg_page_vec(struct iser_conn *ib_conn,
+int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
- struct iser_mem_reg *mem_reg);
+ struct iser_mem_reg *mem_reg);
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
-int iser_post_recvl(struct iser_conn *ib_conn);
-int iser_post_recvm(struct iser_conn *ib_conn, int count);
-int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
+int iser_post_recvl(struct iser_conn *iser_conn);
+int iser_post_recvm(struct iser_conn *iser_conn, int count);
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+ bool signal);
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum iser_data_dir iser_dir,
- enum dma_data_direction dma_dir);
+ struct iser_data_buf *data,
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir);
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fmr_pool(struct iser_conn *ib_conn);
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fastreg_pool(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+ struct iscsi_session *session);
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct ib_conn *ib_conn);
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 8d44a4060634..5a489ea63732 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -160,10 +160,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
}
/* creates a new tx descriptor and adds header regd buffer */
-static void iser_create_send_desc(struct iser_conn *ib_conn,
+static void iser_create_send_desc(struct iser_conn *iser_conn,
struct iser_tx_desc *tx_desc)
{
- struct iser_device *device = ib_conn->device;
+ struct iser_device *device = iser_conn->ib_conn.device;
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -179,103 +179,108 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
}
}
-static void iser_free_login_buf(struct iser_conn *ib_conn)
+static void iser_free_login_buf(struct iser_conn *iser_conn)
{
- if (!ib_conn->login_buf)
+ struct iser_device *device = iser_conn->ib_conn.device;
+
+ if (!iser_conn->login_buf)
return;
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
+ if (iser_conn->login_req_dma)
+ ib_dma_unmap_single(device->ib_device,
+ iser_conn->login_req_dma,
ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
+ if (iser_conn->login_resp_dma)
+ ib_dma_unmap_single(device->ib_device,
+ iser_conn->login_resp_dma,
ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
+ kfree(iser_conn->login_buf);
/* make sure we never redo any unmapping */
- ib_conn->login_req_dma = 0;
- ib_conn->login_resp_dma = 0;
- ib_conn->login_buf = NULL;
+ iser_conn->login_req_dma = 0;
+ iser_conn->login_resp_dma = 0;
+ iser_conn->login_buf = NULL;
}
-static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
- struct iser_device *device;
+ struct iser_device *device = iser_conn->ib_conn.device;
int req_err, resp_err;
- BUG_ON(ib_conn->device == NULL);
+ BUG_ON(device == NULL);
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
+ if (!iser_conn->login_buf)
goto out_err;
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf +
+ iser_conn->login_req_buf = iser_conn->login_buf;
+ iser_conn->login_resp_buf = iser_conn->login_buf +
ISCSI_DEF_MAX_RECV_SEG_LEN;
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
+ iser_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
+ iser_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
req_err = ib_dma_mapping_error(device->ib_device,
- ib_conn->login_req_dma);
+ iser_conn->login_req_dma);
resp_err = ib_dma_mapping_error(device->ib_device,
- ib_conn->login_resp_dma);
+ iser_conn->login_resp_dma);
if (req_err || resp_err) {
if (req_err)
- ib_conn->login_req_dma = 0;
+ iser_conn->login_req_dma = 0;
if (resp_err)
- ib_conn->login_resp_dma = 0;
+ iser_conn->login_resp_dma = 0;
goto free_login_buf;
}
return 0;
free_login_buf:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
out_err:
iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+ struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
struct iser_rx_desc *rx_desc;
struct ib_sge *rx_sg;
- struct iser_device *device = ib_conn->device;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
- ib_conn->qp_max_recv_dtos = session->cmds_max;
- ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
- ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+ iser_conn->qp_max_recv_dtos = session->cmds_max;
+ iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
goto create_rdma_reg_res_failed;
- if (iser_alloc_login_buf(ib_conn))
+ if (iser_alloc_login_buf(iser_conn))
goto alloc_login_buf_fail;
- ib_conn->rx_descs = kmalloc(session->cmds_max *
+ iser_conn->num_rx_descs = session->cmds_max;
+ iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
sizeof(struct iser_rx_desc), GFP_KERNEL);
- if (!ib_conn->rx_descs)
+ if (!iser_conn->rx_descs)
goto rx_desc_alloc_fail;
- rx_desc = ib_conn->rx_descs;
+ rx_desc = iser_conn->rx_descs;
- for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
+ for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -289,18 +294,18 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s
rx_sg->lkey = device->mr->lkey;
}
- ib_conn->rx_desc_head = 0;
+ iser_conn->rx_desc_head = 0;
return 0;
rx_desc_dma_map_failed:
- rx_desc = ib_conn->rx_descs;
+ rx_desc = iser_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->rx_descs);
- ib_conn->rx_descs = NULL;
+ kfree(iser_conn->rx_descs);
+ iser_conn->rx_descs = NULL;
rx_desc_alloc_fail:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
alloc_login_buf_fail:
device->iser_free_rdma_reg_res(ib_conn);
create_rdma_reg_res_failed:
@@ -308,33 +313,35 @@ create_rdma_reg_res_failed:
return -ENOMEM;
}
-void iser_free_rx_descriptors(struct iser_conn *ib_conn)
+void iser_free_rx_descriptors(struct iser_conn *iser_conn)
{
int i;
struct iser_rx_desc *rx_desc;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- if (!ib_conn->rx_descs)
+ if (!iser_conn->rx_descs)
goto free_login_buf;
if (device->iser_free_rdma_reg_res)
device->iser_free_rdma_reg_res(ib_conn);
- rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
+ rx_desc = iser_conn->rx_descs;
+ for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->rx_descs);
+ kfree(iser_conn->rx_descs);
/* make sure we never redo any unmapping */
- ib_conn->rx_descs = NULL;
+ iser_conn->rx_descs = NULL;
free_login_buf:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iscsi_session *session = conn->session;
iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
@@ -343,34 +350,37 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
return 0;
/*
- * Check that there is one posted recv buffer (for the last login
- * response) and no posted send buffers left - they must have been
- * consumed during previous login phases.
+ * Check that there is one posted recv buffer
+ * (for the last login response).
*/
WARN_ON(ib_conn->post_recv_buf_count != 1);
- WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);
if (session->discovery_sess) {
iser_info("Discovery session, re-using login RX buffer\n");
return 0;
} else
iser_info("Normal session, posting batch of RX %d buffers\n",
- ib_conn->min_posted_rx);
+ iser_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))
+ if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
return -ENOMEM;
return 0;
}
+static inline bool iser_signal_comp(int sig_count)
+{
+ return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
+}
+
/**
* iser_send_command - send command PDU
*/
int iser_send_command(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
unsigned long edtl;
int err;
@@ -378,12 +388,13 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ static unsigned sig_count;
edtl = ntohl(hdr->data_length);
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
- iser_create_send_desc(ib_conn, tx_desc);
+ iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
data_buf = &iser_task->data[ISER_DIR_IN];
@@ -423,7 +434,8 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
- err = iser_post_send(ib_conn, tx_desc);
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc,
+ iser_signal_comp(++sig_count));
if (!err)
return 0;
@@ -439,7 +451,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iscsi_task *task,
struct iscsi_data *hdr)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf;
@@ -488,7 +500,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
itt, buf_offset, data_seg_len);
- err = iser_post_send(ib_conn, tx_desc);
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
if (!err)
return 0;
@@ -501,7 +513,7 @@ send_data_out_error:
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *mdesc = &iser_task->desc;
unsigned long data_seg_len;
@@ -510,9 +522,9 @@ int iser_send_control(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
- iser_create_send_desc(ib_conn, mdesc);
+ iser_create_send_desc(iser_conn, mdesc);
- device = ib_conn->device;
+ device = iser_conn->ib_conn.device;
data_seg_len = ntoh24(task->hdr->dlength);
@@ -524,16 +536,16 @@ int iser_send_control(struct iscsi_conn *conn,
}
ib_dma_sync_single_for_cpu(device->ib_device,
- ib_conn->login_req_dma, task->data_count,
+ iser_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- memcpy(ib_conn->login_req_buf, task->data, task->data_count);
+ memcpy(iser_conn->login_req_buf, task->data, task->data_count);
ib_dma_sync_single_for_device(device->ib_device,
- ib_conn->login_req_dma, task->data_count,
+ iser_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- tx_dsg->addr = ib_conn->login_req_dma;
+ tx_dsg->addr = iser_conn->login_req_dma;
tx_dsg->length = task->data_count;
tx_dsg->lkey = device->mr->lkey;
mdesc->num_sge = 2;
@@ -542,7 +554,7 @@ int iser_send_control(struct iscsi_conn *conn,
if (task == conn->login_task) {
iser_dbg("op %x dsl %lx, posting login rx buffer\n",
task->hdr->opcode, data_seg_len);
- err = iser_post_recvl(ib_conn);
+ err = iser_post_recvl(iser_conn);
if (err)
goto send_control_error;
err = iser_post_rx_bufs(conn, task->hdr);
@@ -550,7 +562,7 @@ int iser_send_control(struct iscsi_conn *conn,
goto send_control_error;
}
- err = iser_post_send(ib_conn, mdesc);
+ err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
if (!err)
return 0;
@@ -564,15 +576,17 @@ send_control_error:
*/
void iser_rcv_completion(struct iser_rx_desc *rx_desc,
unsigned long rx_xfer_len,
- struct iser_conn *ib_conn)
+ struct ib_conn *ib_conn)
{
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
struct iscsi_hdr *hdr;
u64 rx_dma;
int rx_buflen, outstanding, count, err;
/* differentiate between login to all other PDUs */
- if ((char *)rx_desc == ib_conn->login_resp_buf) {
- rx_dma = ib_conn->login_resp_dma;
+ if ((char *)rx_desc == iser_conn->login_resp_buf) {
+ rx_dma = iser_conn->login_resp_dma;
rx_buflen = ISER_RX_LOGIN_SIZE;
} else {
rx_dma = rx_desc->dma_addr;
@@ -580,14 +594,14 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
}
ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ rx_buflen, DMA_FROM_DEVICE);
hdr = &rx_desc->iscsi_header;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
- iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data,
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
rx_xfer_len - ISER_HEADERS_LEN);
ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
@@ -599,21 +613,21 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == ib_conn->login_resp_dma)
+ if (rx_dma == iser_conn->login_resp_dma)
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
- count = min(ib_conn->qp_max_recv_dtos - outstanding,
- ib_conn->min_posted_rx);
- err = iser_post_recvm(ib_conn, count);
+ if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
+ count = min(iser_conn->qp_max_recv_dtos - outstanding,
+ iser_conn->min_posted_rx);
+ err = iser_post_recvm(iser_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
}
}
void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct iser_conn *ib_conn)
+ struct ib_conn *ib_conn)
{
struct iscsi_task *task;
struct iser_device *device = ib_conn->device;
@@ -625,8 +639,6 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,
tx_desc = NULL;
}
- atomic_dec(&ib_conn->post_send_buf_count);
-
if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
/* this arithmetic is legal by libiscsi dd_data allocation */
task = (void *) ((long)(void *)tx_desc -
@@ -658,7 +670,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
int is_rdma_data_aligned = 1;
int is_rdma_prot_aligned = 1;
int prot_count = scsi_prot_sg_count(iser_task->sc);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 47acd3ad3a17..6c5ce357fba6 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir)
{
- struct ib_device *dev = iser_task->ib_conn->device->ib_device;
+ struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
char *mem = NULL;
@@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
unsigned long cmd_data_len;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ?
@@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
iser_task->dir[iser_dir] = 1;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
if (data->dma_nents == 0) {
@@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
{
struct ib_device *dev;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
}
@@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir,
int aligned_len)
{
- struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn;
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
iscsi_conn->fmr_unalign_cnt++;
iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
@@ -377,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
@@ -432,7 +432,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
ib_conn->fmr.page_vec->offset);
for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->fmr.page_vec->pages[i]);
+ (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
}
if (err)
return err;
@@ -440,77 +440,74 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
-static inline enum ib_t10_dif_type
-scsi2ib_prot_type(unsigned char prot_type)
+static inline void
+iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
+ struct ib_sig_domain *domain)
{
- switch (prot_type) {
- case SCSI_PROT_DIF_TYPE0:
- return IB_T10DIF_NONE;
- case SCSI_PROT_DIF_TYPE1:
- return IB_T10DIF_TYPE1;
- case SCSI_PROT_DIF_TYPE2:
- return IB_T10DIF_TYPE2;
- case SCSI_PROT_DIF_TYPE3:
- return IB_T10DIF_TYPE3;
- default:
- return IB_T10DIF_NONE;
- }
-}
-
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.pi_interval = sc->device->sector_size;
+ domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+ /*
+ * At the moment we hard code those, but in the future
+ * we will take them from sc.
+ */
+ domain->sig.dif.apptag_check_mask = 0xffff;
+ domain->sig.dif.app_escape = true;
+ domain->sig.dif.ref_escape = true;
+ if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
+ scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+ domain->sig.dif.ref_remap = true;
+};
static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
- unsigned char scsi_ptype = scsi_get_prot_type(sc);
-
- sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
- sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
-
switch (scsi_get_prot_op(sc)) {
case SCSI_PROT_WRITE_INSERT:
case SCSI_PROT_READ_STRIP:
- sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
- sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
break;
case SCSI_PROT_READ_INSERT:
case SCSI_PROT_WRITE_STRIP:
- sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
- sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ /*
+ * At the moment we use this modparam to tell what is
+ * the memory bg_type, in the future we will take it
+ * from sc.
+ */
+ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+ IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
- sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
- sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ /*
+ * At the moment we use this modparam to tell what is
+ * the memory bg_type, in the future we will take it
+ * from sc.
+ */
+ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+ IB_T10DIF_CRC;
break;
default:
iser_err("Unsupported PI operation %d\n",
scsi_get_prot_op(sc));
return -EINVAL;
}
+
return 0;
}
-
static int
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
switch (scsi_get_prot_type(sc)) {
case SCSI_PROT_DIF_TYPE0:
- *mask = 0x0;
break;
case SCSI_PROT_DIF_TYPE1:
case SCSI_PROT_DIF_TYPE2:
@@ -533,7 +530,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_pi_context *pi_ctx = desc->pi_ctx;
struct ib_send_wr sig_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
@@ -609,7 +606,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct ib_sge *sge)
{
struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct ib_mr *mr;
@@ -700,7 +697,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ea01075f9f9b..67225bb82bb5 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -39,8 +39,12 @@
#include "iscsi_iser.h"
#define ISCSI_ISER_MAX_CONN 8
-#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
+ ISCSI_ISER_MAX_CONN)
+
+static int iser_cq_poll_limit = 512;
static void iser_cq_tasklet_fn(unsigned long data);
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -71,9 +75,8 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct iser_cq_desc *cq_desc;
struct ib_device_attr *dev_attr = &device->dev_attr;
- int ret, i, j;
+ int ret, i;
ret = ib_query_device(device->ib_device, dev_attr);
if (ret) {
@@ -101,47 +104,35 @@ static int iser_create_device_ib_res(struct iser_device *device)
return -1;
}
- device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
+ device->comps_used = min(ISER_MAX_CQ,
+ device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
- device->cqs_used, device->ib_device->name,
+ device->comps_used, device->ib_device->name,
device->ib_device->num_comp_vectors);
- device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
- GFP_KERNEL);
- if (device->cq_desc == NULL)
- goto cq_desc_err;
- cq_desc = device->cq_desc;
-
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
goto pd_err;
- for (i = 0; i < device->cqs_used; i++) {
- cq_desc[i].device = device;
- cq_desc[i].cq_index = i;
-
- device->rx_cq[i] = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)&cq_desc[i],
- ISER_MAX_RX_CQ_LEN, i);
- if (IS_ERR(device->rx_cq[i]))
- goto cq_err;
-
- device->tx_cq[i] = ib_create_cq(device->ib_device,
- NULL, iser_cq_event_callback,
- (void *)&cq_desc[i],
- ISER_MAX_TX_CQ_LEN, i);
-
- if (IS_ERR(device->tx_cq[i]))
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ comp->device = device;
+ comp->cq = ib_create_cq(device->ib_device,
+ iser_cq_callback,
+ iser_cq_event_callback,
+ (void *)comp,
+ ISER_MAX_CQ_LEN, i);
+ if (IS_ERR(comp->cq)) {
+ comp->cq = NULL;
goto cq_err;
+ }
- if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
+ if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
goto cq_err;
- tasklet_init(&device->cq_tasklet[i],
- iser_cq_tasklet_fn,
- (unsigned long)&cq_desc[i]);
+ tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
+ (unsigned long)comp);
}
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
@@ -160,19 +151,17 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
ib_dereg_mr(device->mr);
dma_mr_err:
- for (j = 0; j < device->cqs_used; j++)
- tasklet_kill(&device->cq_tasklet[j]);
+ for (i = 0; i < device->comps_used; i++)
+ tasklet_kill(&device->comps[i].tasklet);
cq_err:
- for (j = 0; j < i; j++) {
- if (device->tx_cq[j])
- ib_destroy_cq(device->tx_cq[j]);
- if (device->rx_cq[j])
- ib_destroy_cq(device->rx_cq[j]);
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ if (comp->cq)
+ ib_destroy_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
- kfree(device->cq_desc);
-cq_desc_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -186,20 +175,18 @@ static void iser_free_device_ib_res(struct iser_device *device)
int i;
BUG_ON(device->mr == NULL);
- for (i = 0; i < device->cqs_used; i++) {
- tasklet_kill(&device->cq_tasklet[i]);
- (void)ib_destroy_cq(device->tx_cq[i]);
- (void)ib_destroy_cq(device->rx_cq[i]);
- device->tx_cq[i] = NULL;
- device->rx_cq[i] = NULL;
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ tasklet_kill(&comp->tasklet);
+ ib_destroy_cq(comp->cq);
+ comp->cq = NULL;
}
(void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr);
(void)ib_dealloc_pd(device->pd);
- kfree(device->cq_desc);
-
device->mr = NULL;
device->pd = NULL;
}
@@ -209,7 +196,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
*
* returns 0 on success, or errno code on failure
*/
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
{
struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
@@ -259,7 +246,7 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
/**
* iser_free_fmr_pool - releases the FMR pool and page vec
*/
-void iser_free_fmr_pool(struct iser_conn *ib_conn)
+void iser_free_fmr_pool(struct ib_conn *ib_conn)
{
iser_info("freeing conn %p fmr pool %p\n",
ib_conn, ib_conn->fmr.pool);
@@ -363,10 +350,10 @@ fast_reg_mr_failure:
* for fast registration work requests.
* returns 0 on success, or errno code on failure
*/
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
{
- struct iser_device *device = ib_conn->device;
- struct fast_reg_descriptor *desc;
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
int i, ret;
INIT_LIST_HEAD(&ib_conn->fastreg.pool);
@@ -402,7 +389,7 @@ err:
/**
* iser_free_fastreg_pool - releases the pool of fast_reg descriptors
*/
-void iser_free_fastreg_pool(struct iser_conn *ib_conn)
+void iser_free_fastreg_pool(struct ib_conn *ib_conn)
{
struct fast_reg_descriptor *desc, *tmp;
int i = 0;
@@ -436,7 +423,7 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn)
*
* returns 0 on success, -1 on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
struct iser_device *device;
struct ib_qp_init_attr init_attr;
@@ -451,28 +438,30 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
mutex_lock(&ig.connlist_mutex);
/* select the CQ with the minimal number of usages */
- for (index = 0; index < device->cqs_used; index++)
- if (device->cq_active_qps[index] <
- device->cq_active_qps[min_index])
+ for (index = 0; index < device->comps_used; index++) {
+ if (device->comps[index].active_qps <
+ device->comps[min_index].active_qps)
min_index = index;
- device->cq_active_qps[min_index]++;
+ }
+ ib_conn->comp = &device->comps[min_index];
+ ib_conn->comp->active_qps++;
mutex_unlock(&ig.connlist_mutex);
iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = device->tx_cq[min_index];
- init_attr.recv_cq = device->rx_cq[min_index];
+ init_attr.send_cq = ib_conn->comp->cq;
+ init_attr.recv_cq = ib_conn->comp->cq;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC;
if (ib_conn->pi_support) {
- init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
+ init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
} else {
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
+ init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
}
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -491,33 +480,6 @@ out_err:
}
/**
- * releases the QP objects, returns 0 on success,
- * -1 on failure
- */
-static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
-{
- int cq_index;
- BUG_ON(ib_conn == NULL);
-
- iser_info("freeing conn %p cma_id %p qp %p\n",
- ib_conn, ib_conn->cma_id,
- ib_conn->qp);
-
- /* qp is created only once both addr & route are resolved */
-
- if (ib_conn->qp != NULL) {
- cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
- ib_conn->device->cq_active_qps[cq_index]--;
-
- rdma_destroy_qp(ib_conn->cma_id);
- }
-
- ib_conn->qp = NULL;
-
- return 0;
-}
-
-/**
* based on the resolved device node GUID see if there already allocated
* device for this device. If there's no such, create one.
*/
@@ -568,96 +530,174 @@ static void iser_device_try_release(struct iser_device *device)
mutex_unlock(&ig.device_list_mutex);
}
-static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
- enum iser_ib_conn_state comp,
- enum iser_ib_conn_state exch)
+/**
+ * Called with state mutex held
+ **/
+static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
+ enum iser_conn_state comp,
+ enum iser_conn_state exch)
{
int ret;
- spin_lock_bh(&ib_conn->lock);
- if ((ret = (ib_conn->state == comp)))
- ib_conn->state = exch;
- spin_unlock_bh(&ib_conn->lock);
+ ret = (iser_conn->state == comp);
+ if (ret)
+ iser_conn->state = exch;
+
return ret;
}
void iser_release_work(struct work_struct *work)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
- ib_conn = container_of(work, struct iser_conn, release_work);
+ iser_conn = container_of(work, struct iser_conn, release_work);
- /* wait for .conn_stop callback */
- wait_for_completion(&ib_conn->stop_completion);
+ /* Wait for conn_stop to complete */
+ wait_for_completion(&iser_conn->stop_completion);
+ /* Wait for IB resouces cleanup to complete */
+ wait_for_completion(&iser_conn->ib_completion);
- /* wait for the qp`s post send and post receive buffers to empty */
- wait_event_interruptible(ib_conn->wait,
- ib_conn->state == ISER_CONN_DOWN);
+ mutex_lock(&iser_conn->state_mutex);
+ iser_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&iser_conn->state_mutex);
- iser_conn_release(ib_conn);
+ iser_conn_release(iser_conn);
}
/**
- * Frees all conn objects and deallocs conn descriptor
+ * iser_free_ib_conn_res - release IB related resources
+ * @iser_conn: iser connection struct
+ * @destroy_device: indicator if we need to try to release
+ * the iser device (only iscsi shutdown and DEVICE_REMOVAL
+ * will use this.
+ *
+ * This routine is called with the iser state mutex held
+ * so the cm_id removal is out of here. It is Safe to
+ * be invoked multiple times.
*/
-void iser_conn_release(struct iser_conn *ib_conn)
+static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
+ bool destroy_device)
{
- struct iser_device *device = ib_conn->device;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+
+ iser_info("freeing conn %p cma_id %p qp %p\n",
+ iser_conn, ib_conn->cma_id, ib_conn->qp);
+
+ iser_free_rx_descriptors(iser_conn);
+
+ if (ib_conn->qp != NULL) {
+ ib_conn->comp->active_qps--;
+ rdma_destroy_qp(ib_conn->cma_id);
+ ib_conn->qp = NULL;
+ }
+
+ if (destroy_device && device != NULL) {
+ iser_device_try_release(device);
+ ib_conn->device = NULL;
+ }
+}
- BUG_ON(ib_conn->state == ISER_CONN_UP);
+/**
+ * Frees all conn objects and deallocs conn descriptor
+ */
+void iser_conn_release(struct iser_conn *iser_conn)
+{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
mutex_lock(&ig.connlist_mutex);
- list_del(&ib_conn->conn_list);
+ list_del(&iser_conn->conn_list);
mutex_unlock(&ig.connlist_mutex);
- iser_free_rx_descriptors(ib_conn);
- iser_free_ib_conn_res(ib_conn);
- ib_conn->device = NULL;
- /* on EVENT_ADDR_ERROR there's no device yet for this conn */
- if (device != NULL)
- iser_device_try_release(device);
- /* if cma handler context, the caller actually destroy the id */
+
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_DOWN)
+ iser_warn("iser conn %p state %d, expected state down.\n",
+ iser_conn, iser_conn->state);
+ /*
+ * In case we never got to bind stage, we still need to
+ * release IB resources (which is safe to call more than once).
+ */
+ iser_free_ib_conn_res(iser_conn, true);
+ mutex_unlock(&iser_conn->state_mutex);
+
if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
- iscsi_destroy_endpoint(ib_conn->ep);
+
+ kfree(iser_conn);
}
/**
* triggers start of the disconnect procedures and wait for them to be done
+ * Called with state mutex held
*/
-void iser_conn_terminate(struct iser_conn *ib_conn)
+int iser_conn_terminate(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct ib_send_wr *bad_wr;
int err = 0;
- /* change the ib conn state only if the conn is UP, however always call
- * rdma_disconnect since this is the only way to cause the CMA to change
- * the QP state to ERROR
+ /* terminate the iser conn only if the conn state is UP */
+ if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
+ ISER_CONN_TERMINATING))
+ return 0;
+
+ iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
+
+ /* suspend queuing of new iscsi commands */
+ if (iser_conn->iscsi_conn)
+ iscsi_suspend_queue(iser_conn->iscsi_conn);
+
+ /*
+ * In case we didn't already clean up the cma_id (peer initiated
+ * a disconnection), we need to Cause the CMA to change the QP
+ * state to ERROR.
*/
+ if (ib_conn->cma_id) {
+ err = rdma_disconnect(ib_conn->cma_id);
+ if (err)
+ iser_err("Failed to disconnect, conn: 0x%p err %d\n",
+ iser_conn, err);
+
+ /* post an indication that all flush errors were consumed */
+ err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ if (err)
+ iser_err("conn %p failed to post beacon", ib_conn);
+
+ wait_for_completion(&ib_conn->flush_comp);
+ }
- iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
- err = rdma_disconnect(ib_conn->cma_id);
- if (err)
- iser_err("Failed to disconnect, conn: 0x%p err %d\n",
- ib_conn,err);
+ return 1;
}
+/**
+ * Called with state mutex held
+ **/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
-
- ib_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn;
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn->state = ISER_CONN_DOWN;
}
+/**
+ * Called with state mutex held
+ **/
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
+ struct ib_conn *ib_conn;
int ret;
+ iser_conn = (struct iser_conn *)cma_id->context;
+ if (iser_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
+ ib_conn = &iser_conn->ib_conn;
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
@@ -665,7 +705,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
return;
}
- ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->device = device;
/* connection T10-PI support */
@@ -689,18 +728,28 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
}
}
+/**
+ * Called with state mutex held
+ **/
static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
int ret;
struct iser_cm_hdr req_hdr;
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
- ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
+ if (iser_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
+ ret = iser_create_ib_conn_res(ib_conn);
if (ret)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 4;
+ conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
@@ -724,47 +773,60 @@ failure:
static void iser_connected_handler(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
+ iser_conn = (struct iser_conn *)cma_id->context;
+ if (iser_conn->state != ISER_CONN_PENDING)
+ /* bailout */
+ return;
+
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
- ib_conn = (struct iser_conn *)cma_id->context;
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
- wake_up_interruptible(&ib_conn->wait);
+ iser_conn->state = ISER_CONN_UP;
+ complete(&iser_conn->up_completion);
}
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
- ib_conn = (struct iser_conn *)cma_id->context;
-
- /* getting here when the state is UP means that the conn is being *
- * terminated asynchronously from the iSCSI layer's perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING)){
- if (ib_conn->iscsi_conn)
- iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
+ if (iser_conn_terminate(iser_conn)) {
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
else
iser_err("iscsi_iser connection isn't bound\n");
}
-
- /* Complete the termination process if no posts are pending */
- if (ib_conn->post_recv_buf_count == 0 &&
- (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
- }
}
+static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
+ bool destroy_device)
+{
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+
+ /*
+ * We are not guaranteed that we visited disconnected_handler
+ * by now, call it here to be safe that we handle CM drep
+ * and flush errors.
+ */
+ iser_disconnected_handler(cma_id);
+ iser_free_ib_conn_res(iser_conn, destroy_device);
+ complete(&iser_conn->ib_completion);
+};
+
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
+ struct iser_conn *iser_conn;
+ int ret = 0;
+
+ iser_conn = (struct iser_conn *)cma_id->context;
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
+ mutex_lock(&iser_conn->state_mutex);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
iser_addr_handler(cma_id);
@@ -783,89 +845,107 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
iser_disconnected_handler(cma_id);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ /*
+ * we *must* destroy the device as we cannot rely
+ * on iscsid to be around to initiate error handling.
+ * also implicitly destroy the cma_id.
+ */
+ iser_cleanup_handler(cma_id, true);
+ iser_conn->ib_conn.cma_id = NULL;
+ ret = 1;
+ break;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ iser_cleanup_handler(cma_id, false);
+ break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
- return 0;
+ mutex_unlock(&iser_conn->state_mutex);
+
+ return ret;
}
-void iser_conn_init(struct iser_conn *ib_conn)
+void iser_conn_init(struct iser_conn *iser_conn)
{
- ib_conn->state = ISER_CONN_INIT;
- init_waitqueue_head(&ib_conn->wait);
- ib_conn->post_recv_buf_count = 0;
- atomic_set(&ib_conn->post_send_buf_count, 0);
- init_completion(&ib_conn->stop_completion);
- INIT_LIST_HEAD(&ib_conn->conn_list);
- spin_lock_init(&ib_conn->lock);
+ iser_conn->state = ISER_CONN_INIT;
+ iser_conn->ib_conn.post_recv_buf_count = 0;
+ init_completion(&iser_conn->ib_conn.flush_comp);
+ init_completion(&iser_conn->stop_completion);
+ init_completion(&iser_conn->ib_completion);
+ init_completion(&iser_conn->up_completion);
+ INIT_LIST_HEAD(&iser_conn->conn_list);
+ spin_lock_init(&iser_conn->ib_conn.lock);
+ mutex_init(&iser_conn->state_mutex);
}
/**
* starts the process of connecting to the target
* sleeps until the connection is established or rejected
*/
-int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr_in *src_addr,
- struct sockaddr_in *dst_addr,
+int iser_connect(struct iser_conn *iser_conn,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
int non_blocking)
{
- struct sockaddr *src, *dst;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
int err = 0;
- sprintf(ib_conn->name, "%pI4:%d",
- &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
+ mutex_lock(&iser_conn->state_mutex);
+
+ sprintf(iser_conn->name, "%pISp", dst_addr);
+
+ iser_info("connecting to: %s\n", iser_conn->name);
/* the device is known only --after-- address resolution */
ib_conn->device = NULL;
- iser_info("connecting to: %pI4, port 0x%x\n",
- &dst_addr->sin_addr, dst_addr->sin_port);
+ iser_conn->state = ISER_CONN_PENDING;
- ib_conn->state = ISER_CONN_PENDING;
+ ib_conn->beacon.wr_id = ISER_BEACON_WRID;
+ ib_conn->beacon.opcode = IB_WR_SEND;
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
- (void *)ib_conn,
- RDMA_PS_TCP, IB_QPT_RC);
+ (void *)iser_conn,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ib_conn->cma_id)) {
err = PTR_ERR(ib_conn->cma_id);
iser_err("rdma_create_id failed: %d\n", err);
goto id_failure;
}
- src = (struct sockaddr *)src_addr;
- dst = (struct sockaddr *)dst_addr;
- err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
+ err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
if (err) {
iser_err("rdma_resolve_addr failed: %d\n", err);
goto addr_failure;
}
if (!non_blocking) {
- wait_event_interruptible(ib_conn->wait,
- (ib_conn->state != ISER_CONN_PENDING));
+ wait_for_completion_interruptible(&iser_conn->up_completion);
- if (ib_conn->state != ISER_CONN_UP) {
+ if (iser_conn->state != ISER_CONN_UP) {
err = -EIO;
goto connect_failure;
}
}
+ mutex_unlock(&iser_conn->state_mutex);
mutex_lock(&ig.connlist_mutex);
- list_add(&ib_conn->conn_list, &ig.connlist);
+ list_add(&iser_conn->conn_list, &ig.connlist);
mutex_unlock(&ig.connlist_mutex);
return 0;
id_failure:
ib_conn->cma_id = NULL;
addr_failure:
- ib_conn->state = ISER_CONN_DOWN;
+ iser_conn->state = ISER_CONN_DOWN;
connect_failure:
- iser_conn_release(ib_conn);
+ mutex_unlock(&iser_conn->state_mutex);
+ iser_conn_release(iser_conn);
return err;
}
@@ -874,7 +954,7 @@ connect_failure:
*
* returns: 0 on success, errno code on failure
*/
-int iser_reg_page_vec(struct iser_conn *ib_conn,
+int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
@@ -944,7 +1024,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct iser_conn *iser_conn = iser_task->iser_conn;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!reg->is_mr)
@@ -957,17 +1038,18 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
spin_unlock_bh(&ib_conn->lock);
}
-int iser_post_recvl(struct iser_conn *ib_conn)
+int iser_post_recvl(struct iser_conn *iser_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_sge sge;
int ib_ret;
- sge.addr = ib_conn->login_resp_dma;
+ sge.addr = iser_conn->login_resp_dma;
sge.length = ISER_RX_LOGIN_SIZE;
sge.lkey = ib_conn->device->mr->lkey;
- rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf;
+ rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf;
rx_wr.sg_list = &sge;
rx_wr.num_sge = 1;
rx_wr.next = NULL;
@@ -981,20 +1063,21 @@ int iser_post_recvl(struct iser_conn *ib_conn)
return ib_ret;
}
-int iser_post_recvm(struct iser_conn *ib_conn, int count)
+int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
struct ib_recv_wr *rx_wr, *rx_wr_failed;
int i, ib_ret;
- unsigned int my_rx_head = ib_conn->rx_desc_head;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &ib_conn->rx_descs[my_rx_head];
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
rx_wr->wr_id = (unsigned long)rx_desc;
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
+ my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
@@ -1006,7 +1089,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
- ib_conn->rx_desc_head = my_rx_head;
+ iser_conn->rx_desc_head = my_rx_head;
return ib_ret;
}
@@ -1016,138 +1099,166 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
*
* returns 0 on success, -1 on failure
*/
-int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+ bool signal)
{
int ib_ret;
struct ib_send_wr send_wr, *send_wr_failed;
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
- tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ tx_desc->dma_addr, ISER_HEADERS_LEN,
+ DMA_TO_DEVICE);
send_wr.next = NULL;
send_wr.wr_id = (unsigned long)tx_desc;
send_wr.sg_list = tx_desc->tx_sg;
send_wr.num_sge = tx_desc->num_sge;
send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = IB_SEND_SIGNALED;
-
- atomic_inc(&ib_conn->post_send_buf_count);
+ send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
- if (ib_ret) {
+ if (ib_ret)
iser_err("ib_post_send failed, ret:%d\n", ib_ret);
- atomic_dec(&ib_conn->post_send_buf_count);
- }
+
return ib_ret;
}
-static void iser_handle_comp_error(struct iser_tx_desc *desc,
- struct iser_conn *ib_conn)
+/**
+ * is_iser_tx_desc - Indicate if the completion wr_id
+ * is a TX descriptor or not.
+ * @iser_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
+{
+ void *start = iser_conn->rx_descs;
+ int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
+
+ if (wr_id >= start && wr_id < start + len)
+ return false;
+
+ return true;
+}
+
+/**
+ * iser_handle_comp_error() - Handle error completion
+ * @ib_conn: connection RDMA resources
+ * @wc: work completion
+ *
+ * Notes: We may handle a FLUSH error completion and in this case
+ * we only cleanup in case TX type was DATAOUT. For non-FLUSH
+ * error completion we should also notify iscsi layer that
+ * connection is failed (in case we passed bind stage).
+ */
+static void
+iser_handle_comp_error(struct ib_conn *ib_conn,
+ struct ib_wc *wc)
{
- if (desc && desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
-
- if (ib_conn->post_recv_buf_count == 0 &&
- atomic_read(&ib_conn->post_send_buf_count) == 0) {
- /* getting here when the state is UP means that the conn is *
- * being terminated asynchronously from the iSCSI layer's *
- * perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
- iscsi_conn_failure(ib_conn->iscsi_conn,
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
+
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
- /* no more non completed posts to the QP, complete the
- * termination process w.o worrying on disconnect event */
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
+ if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
+ struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+
+ if (desc->type == ISCSI_TX_DATAOUT)
+ kmem_cache_free(ig.desc_cache, desc);
+ } else {
+ ib_conn->post_recv_buf_count--;
}
}
-static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
+/**
+ * iser_handle_wc - handle a single work completion
+ * @wc: work completion
+ *
+ * Soft-IRQ context, work completion can be either
+ * SEND or RECV, and can turn out successful or
+ * with error (or flush error).
+ */
+static void iser_handle_wc(struct ib_wc *wc)
{
- struct ib_cq *cq = device->tx_cq[cq_index];
- struct ib_wc wc;
+ struct ib_conn *ib_conn;
struct iser_tx_desc *tx_desc;
- struct iser_conn *ib_conn;
- int completed_tx = 0;
-
- while (ib_poll_cq(cq, 1, &wc) == 1) {
- tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
- ib_conn = wc.qp->qp_context;
- if (wc.status == IB_WC_SUCCESS) {
- if (wc.opcode == IB_WC_SEND)
- iser_snd_completion(tx_desc, ib_conn);
- else
- iser_err("expected opcode %d got %d\n",
- IB_WC_SEND, wc.opcode);
+ struct iser_rx_desc *rx_desc;
+
+ ib_conn = wc->qp->qp_context;
+ if (wc->status == IB_WC_SUCCESS) {
+ if (wc->opcode == IB_WC_RECV) {
+ rx_desc = (struct iser_rx_desc *)wc->wr_id;
+ iser_rcv_completion(rx_desc, wc->byte_len,
+ ib_conn);
+ } else
+ if (wc->opcode == IB_WC_SEND) {
+ tx_desc = (struct iser_tx_desc *)wc->wr_id;
+ iser_snd_completion(tx_desc, ib_conn);
} else {
- iser_err("tx id %llx status %d vend_err %x\n",
- wc.wr_id, wc.status, wc.vendor_err);
- if (wc.wr_id != ISER_FASTREG_LI_WRID) {
- atomic_dec(&ib_conn->post_send_buf_count);
- iser_handle_comp_error(tx_desc, ib_conn);
- }
+ iser_err("Unknown wc opcode %d\n", wc->opcode);
}
- completed_tx++;
+ } else {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ iser_err("wr id %llx status %d vend_err %x\n",
+ wc->wr_id, wc->status, wc->vendor_err);
+ else
+ iser_dbg("flush error: wr id %llx\n", wc->wr_id);
+
+ if (wc->wr_id != ISER_FASTREG_LI_WRID &&
+ wc->wr_id != ISER_BEACON_WRID)
+ iser_handle_comp_error(ib_conn, wc);
+
+ /* complete in case all flush errors were consumed */
+ if (wc->wr_id == ISER_BEACON_WRID)
+ complete(&ib_conn->flush_comp);
}
- return completed_tx;
}
-
+/**
+ * iser_cq_tasklet_fn - iSER completion polling loop
+ * @data: iSER completion context
+ *
+ * Soft-IRQ context, polling connection CQ until
+ * either CQ was empty or we exausted polling budget
+ */
static void iser_cq_tasklet_fn(unsigned long data)
{
- struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
- struct iser_device *device = cq_desc->device;
- int cq_index = cq_desc->cq_index;
- struct ib_cq *cq = device->rx_cq[cq_index];
- struct ib_wc wc;
- struct iser_rx_desc *desc;
- unsigned long xfer_len;
- struct iser_conn *ib_conn;
- int completed_tx, completed_rx = 0;
-
- /* First do tx drain, so in a case where we have rx flushes and a successful
- * tx completion we will still go through completion error handling.
- */
- completed_tx = iser_drain_tx_cq(device, cq_index);
-
- while (ib_poll_cq(cq, 1, &wc) == 1) {
- desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
- BUG_ON(desc == NULL);
- ib_conn = wc.qp->qp_context;
- if (wc.status == IB_WC_SUCCESS) {
- if (wc.opcode == IB_WC_RECV) {
- xfer_len = (unsigned long)wc.byte_len;
- iser_rcv_completion(desc, xfer_len, ib_conn);
- } else
- iser_err("expected opcode %d got %d\n",
- IB_WC_RECV, wc.opcode);
- } else {
- if (wc.status != IB_WC_WR_FLUSH_ERR)
- iser_err("rx id %llx status %d vend_err %x\n",
- wc.wr_id, wc.status, wc.vendor_err);
- ib_conn->post_recv_buf_count--;
- iser_handle_comp_error(NULL, ib_conn);
- }
- completed_rx++;
- if (!(completed_rx & 63))
- completed_tx += iser_drain_tx_cq(device, cq_index);
+ struct iser_comp *comp = (struct iser_comp *)data;
+ struct ib_cq *cq = comp->cq;
+ struct ib_wc *const wcs = comp->wcs;
+ int i, n, completed = 0;
+
+ while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+ for (i = 0; i < n; i++)
+ iser_handle_wc(&wcs[i]);
+
+ completed += n;
+ if (completed >= iser_cq_poll_limit)
+ break;
}
- /* #warning "it is assumed here that arming CQ only once its empty" *
- * " would not cause interrupts to be missed" */
+
+ /*
+ * It is assumed here that arming CQ only once its empty
+ * would not cause interrupts to be missed.
+ */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
+ iser_dbg("got %d completions\n", completed);
}
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{
- struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
- struct iser_device *device = cq_desc->device;
- int cq_index = cq_desc->cq_index;
+ struct iser_comp *comp = cq_context;
- tasklet_schedule(&device->cq_tasklet[cq_index]);
+ tasklet_schedule(&comp->tasklet);
}
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index d4c7928a0f36..3effa931fce2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -586,17 +586,12 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
init_completion(&isert_conn->conn_wait);
init_completion(&isert_conn->conn_wait_comp_err);
kref_init(&isert_conn->conn_kref);
- kref_get(&isert_conn->conn_kref);
mutex_init(&isert_conn->conn_mutex);
spin_lock_init(&isert_conn->conn_lock);
INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
cma_id->context = isert_conn;
isert_conn->conn_cm_id = cma_id;
- isert_conn->responder_resources = event->param.conn.responder_resources;
- isert_conn->initiator_depth = event->param.conn.initiator_depth;
- pr_debug("Using responder_resources: %u initiator_depth: %u\n",
- isert_conn->responder_resources, isert_conn->initiator_depth);
isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
ISER_RX_LOGIN_SIZE, GFP_KERNEL);
@@ -643,6 +638,12 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
goto out_rsp_dma_map;
}
+ /* Set max inflight RDMA READ requests */
+ isert_conn->initiator_depth = min_t(u8,
+ event->param.conn.initiator_depth,
+ device->dev_attr.max_qp_init_rd_atom);
+ pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
isert_conn->conn_device = device;
isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
if (IS_ERR(isert_conn->conn_pd)) {
@@ -746,7 +747,9 @@ isert_connect_release(struct isert_conn *isert_conn)
static void
isert_connected_handler(struct rdma_cm_id *cma_id)
{
- return;
+ struct isert_conn *isert_conn = cma_id->context;
+
+ kref_get(&isert_conn->conn_kref);
}
static void
@@ -798,7 +801,6 @@ isert_disconnect_work(struct work_struct *work)
wake_up:
complete(&isert_conn->conn_wait);
- isert_put_conn(isert_conn);
}
static void
@@ -2183,7 +2185,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -2607,58 +2609,45 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
return ret;
}
-static inline enum ib_t10_dif_type
-se2ib_prot_type(enum target_prot_type prot_type)
+static inline void
+isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
+ struct ib_sig_domain *domain)
{
- switch (prot_type) {
- case TARGET_DIF_TYPE0_PROT:
- return IB_T10DIF_NONE;
- case TARGET_DIF_TYPE1_PROT:
- return IB_T10DIF_TYPE1;
- case TARGET_DIF_TYPE2_PROT:
- return IB_T10DIF_TYPE2;
- case TARGET_DIF_TYPE3_PROT:
- return IB_T10DIF_TYPE3;
- default:
- return IB_T10DIF_NONE;
- }
-}
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size;
+ domain->sig.dif.ref_tag = se_cmd->reftag_seed;
+ /*
+ * At the moment we hard code those, but if in the future
+ * the target core would like to use it, we will take it
+ * from se_cmd.
+ */
+ domain->sig.dif.apptag_check_mask = 0xffff;
+ domain->sig.dif.app_escape = true;
+ domain->sig.dif.ref_escape = true;
+ if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT ||
+ se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)
+ domain->sig.dif.ref_remap = true;
+};
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
{
- enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type);
-
- sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->mem.sig.dif.pi_interval =
- se_cmd->se_dev->dev_attrib.block_size;
- sig_attrs->wire.sig.dif.pi_interval =
- se_cmd->se_dev->dev_attrib.block_size;
-
switch (se_cmd->prot_op) {
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
- sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
- sig_attrs->wire.sig.dif.type = ib_prot_type;
- sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
break;
case TARGET_PROT_DOUT_INSERT:
case TARGET_PROT_DIN_STRIP:
- sig_attrs->mem.sig.dif.type = ib_prot_type;
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
- sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
break;
case TARGET_PROT_DIN_PASS:
case TARGET_PROT_DOUT_PASS:
- sig_attrs->mem.sig.dif.type = ib_prot_type;
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
- sig_attrs->wire.sig.dif.type = ib_prot_type;
- sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
break;
default:
pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
@@ -2882,7 +2871,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
- &isert_cmd->tx_desc.send_wr, true);
+ &isert_cmd->tx_desc.send_wr, false);
isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
wr->send_wr_num += 1;
}
@@ -3067,7 +3056,6 @@ isert_rdma_accept(struct isert_conn *isert_conn)
int ret;
memset(&cp, 0, sizeof(struct rdma_conn_param));
- cp.responder_resources = isert_conn->responder_resources;
cp.initiator_depth = isert_conn->initiator_depth;
cp.retry_count = 7;
cp.rnr_retry_count = 7;
@@ -3152,7 +3140,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
accept_wait:
ret = down_interruptible(&isert_np->np_sem);
- if (max_accept > 5)
+ if (ret || max_accept > 5)
return -ENODEV;
spin_lock_bh(&np->np_thread_lock);
@@ -3215,7 +3203,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
pr_debug("isert_wait_conn: Starting \n");
mutex_lock(&isert_conn->conn_mutex);
- if (isert_conn->conn_cm_id) {
+ if (isert_conn->conn_cm_id && !isert_conn->disconnect) {
pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
rdma_disconnect(isert_conn->conn_cm_id);
}
@@ -3234,6 +3222,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
wait_for_completion(&isert_conn->conn_wait_comp_err);
wait_for_completion(&isert_conn->conn_wait);
+ isert_put_conn(isert_conn);
}
static void isert_free_conn(struct iscsi_conn *conn)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b4297f..62d2a18e1b41 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
+static struct workqueue_struct *srp_remove_wq;
static struct ib_client srp_client = {
.name = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
spin_unlock_irq(&target->lock);
if (changed)
- queue_work(system_long_wq, &target->remove_work);
+ queue_work(srp_remove_wq, &target->remove_work);
return changed;
}
@@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
SCSI_SENSE_BUFFERSIZE));
}
- if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
- scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
- else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+ if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
+ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
+ scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
srp_free_req(target, req, scmnd,
be32_to_cpu(rsp->req_lim_delta));
@@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device)
spin_unlock(&host->target_lock);
/*
- * Wait for target port removal tasks.
+ * Wait for tl_err and target port removal tasks.
*/
flush_workqueue(system_long_wq);
+ flush_workqueue(srp_remove_wq);
kfree(host);
}
@@ -3313,16 +3319,22 @@ static int __init srp_init_module(void)
indirect_sg_entries = cmd_sg_entries;
}
+ srp_remove_wq = create_workqueue("srp_remove");
+ if (!srp_remove_wq) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = -ENOMEM;
ib_srp_transport_template =
srp_attach_transport(&ib_srp_transport_functions);
if (!ib_srp_transport_template)
- return -ENOMEM;
+ goto destroy_wq;
ret = class_register(&srp_class);
if (ret) {
pr_err("couldn't register class infiniband_srp\n");
- srp_release_transport(ib_srp_transport_template);
- return ret;
+ goto release_tr;
}
ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3342,22 @@ static int __init srp_init_module(void)
ret = ib_register_client(&srp_client);
if (ret) {
pr_err("couldn't register IB client\n");
- srp_release_transport(ib_srp_transport_template);
- ib_sa_unregister_client(&srp_sa_client);
- class_unregister(&srp_class);
- return ret;
+ goto unreg_sa;
}
- return 0;
+out:
+ return ret;
+
+unreg_sa:
+ ib_sa_unregister_client(&srp_sa_client);
+ class_unregister(&srp_class);
+
+release_tr:
+ srp_release_transport(ib_srp_transport_template);
+
+destroy_wq:
+ destroy_workqueue(srp_remove_wq);
+ goto out;
}
static void __exit srp_cleanup_module(void)
@@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void)
ib_sa_unregister_client(&srp_sa_client);
class_unregister(&srp_class);
srp_release_transport(ib_srp_transport_template);
+ destroy_workqueue(srp_remove_wq);
}
module_init(srp_init_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f2788b15..7206547c13ce 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
case IB_EVENT_PKEY_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
+ case IB_EVENT_GID_CHANGE:
/* Refresh port data asynchronously. */
if (event->element.port_num <= sdev->device->phys_port_cnt) {
sport = &sdev->port[event->element.port_num - 1];
@@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
&reg_req, 0,
srpt_mad_send_handler,
srpt_mad_recv_handler,
- sport);
+ sport, 0);
if (IS_ERR(sport->mad_agent)) {
ret = PTR_ERR(sport->mad_agent);
sport->mad_agent = NULL;
@@ -3573,7 +3574,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
int ret, rc;
p = name;
- if (strnicmp(p, "0x", 2) == 0)
+ if (strncasecmp(p, "0x", 2) == 0)
p += 2;
ret = -EINVAL;
len = strlen(p);