diff options
author | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 16:42:02 +0300 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 16:42:02 +0300 |
commit | a02001086bbfb4da35d1228bebc2f1b442db455f (patch) | |
tree | 62ab47936cef06fd08657ca5b6cd1df98c19be57 /drivers/infiniband | |
parent | eff264efeeb0898408e8c9df72d8a32621035bed (diff) | |
parent | fc14f9c1272f62c3e8d01300f52467c0d9af50f9 (diff) | |
download | linux-a02001086bbfb4da35d1228bebc2f1b442db455f.tar.xz |
Merge Linus' tree to be be to apply submitted patches to newer code than
current trivial.git base
Diffstat (limited to 'drivers/infiniband')
65 files changed, 3091 insertions, 1523 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 2bc7f5af64f4..f6d29614cb01 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { - printk(KERN_ERR SPFX "Unable to find port agent\n"); + dev_err(&device->dev, "Unable to find port agent\n"); return; } agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { - printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", + dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", PTR_ERR(ah)); return; } @@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_KERNEL); if (IS_ERR(send_buf)) { - printk(KERN_ERR SPFX "ib_create_send_mad error\n"); + dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } @@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, } if (ib_post_send_mad(send_buf, NULL)) { - printk(KERN_ERR SPFX "ib_post_send_mad error\n"); + dev_err(&device->dev, "ib_post_send_mad error\n"); goto err2; } return; @@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); + dev_err(&device->dev, "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } @@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, &agent_send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(port_priv->agent[0])) { ret = PTR_ERR(port_priv->agent[0]); goto error2; @@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) port_priv->agent[1] = ib_register_mad_agent(device, port_num, IB_QPT_GSI, NULL, 0, &agent_send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(port_priv->agent[1])) { ret = PTR_ERR(port_priv->agent[1]); goto error3; @@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - printk(KERN_ERR SPFX "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del(&port_priv->port_list); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c3239170d8b7..e28a494e2a3a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device) struct cm_port *port; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_CM, - .mgmt_class_version = IB_CM_CLASS_VERSION + .mgmt_class_version = IB_CM_CLASS_VERSION, }; struct ib_port_modify port_modify = { .set_port_cap_mask = IB_PORT_CM_SUP @@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device) 0, cm_send_handler, cm_recv_handler, - port); + port, + 0); if (IS_ERR(port->mad_agent)) goto error2; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 3d2e489ab732..ff9163dc1596 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -46,6 +46,7 @@ #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -65,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void) if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ab31f136d04b..74c30f4c557e 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -33,6 +33,9 @@ * SOFTWARE. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> @@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, - void *context) + void *context, + u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); @@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate parameters */ qpn = get_spl_qp_index(qp_type); - if (qpn == -1) + if (qpn == -1) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid QP Type %d\n", + qp_type); goto error1; + } - if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid RMPP Version %u\n", + rmpp_version); goto error1; + } /* Validate MAD registration request if supplied */ if (mad_reg_req) { - if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid Class Version %u\n", + mad_reg_req->mgmt_class_version); goto error1; - if (!recv_handler) + } + if (!recv_handler) { + dev_notice(&device->dev, + "ib_register_mad_agent: no recv_handler\n"); goto error1; + } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0\n"); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ - if (!is_vendor_oui(mad_reg_req->oui)) + if (!is_vendor_oui(mad_reg_req->oui)) { + dev_notice(&device->dev, + "ib_register_mad_agent: No OUI specified for class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { - if (rmpp_version) + if (rmpp_version) { + dev_notice(&device->dev, + "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } + /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } } else { /* No registration request supplied */ if (!send_handler) goto error1; + if (registration_flags & IB_MAD_USER_RMPP) + goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { + dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); ret = ERR_PTR(-ENODEV); goto error1; } @@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Verify the QP requested is supported. For example, Ethernet devices * will not have QP0 */ if (!port_priv->qp_info[qpn].qp) { + dev_notice(&device->dev, + "ib_register_mad_agent: QP %d not supported\n", qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } @@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; + mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); @@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, smi_handle_dr_smp_send(smp, device->node_type, port_num) == IB_SMI_DISCARD) { ret = -EINVAL; - printk(KERN_ERR PFX "Invalid directed route\n"); + dev_err(&device->dev, "Invalid directed route\n"); goto out; } @@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for ib_mad_local_private\n"); + dev_err(&device->dev, "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; @@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for local response MAD\n"); + dev_err(&device->dev, "No memory for local response MAD\n"); kfree(local); goto out; } @@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); if (!seg) { - printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " - "alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); + dev_err(&send_buf->mad_agent->device->dev, + "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } @@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } +int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent) +{ + return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); +} +EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); + struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, @@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, pad = get_pad_size(hdr_len, data_len); message_size = hdr_len + data_len + pad; - if ((!mad_agent->rmpp_version && - (rmpp_active || message_size > sizeof(struct ib_mad))) || - (!rmpp_active && message_size > sizeof(struct ib_mad))) - return ERR_PTR(-EINVAL); + if (ib_mad_kernel_rmpp_agent(mad_agent)) { + if (!rmpp_active && message_size > sizeof(struct ib_mad)) + return ERR_PTR(-EINVAL); + } else + if (rmpp_active || message_size > sizeof(struct ib_mad)) + return ERR_PTR(-EINVAL); size = rmpp_active ? hdr_len : sizeof(struct ib_mad); buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); @@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); @@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp); int ib_process_mad_wc(struct ib_mad_agent *mad_agent, struct ib_wc *wc) { - printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n"); + dev_err(&mad_agent->device->dev, + "ib_process_mad_wc() not implemented yet\n"); return 0; } EXPORT_SYMBOL(ib_process_mad_wc); @@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { - printk(KERN_ERR PFX "Method %d already in use\n", i); + pr_err("Method %d already in use\n", i); return -EINVAL; } } @@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method) /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); if (!*method) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_method_table\n"); + pr_err("No memory for ib_mad_mgmt_method_table\n"); return -ENOMEM; } @@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_class_table\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } @@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class_table\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class_table\n"); goto error1; } @@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class\n"); goto error2; } @@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto check_in_use; } } - printk(KERN_ERR PFX "All OUI slots in use\n"); + dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: @@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (mad_agent->agent.recv_handler) atomic_inc(&mad_agent->refcount); else { - printk(KERN_NOTICE PFX "No receive handler for client " - "%p on port %d\n", - &mad_agent->agent, port_priv->port_num); + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); mad_agent = NULL; } } @@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num) /* Make sure MAD base version is understood */ if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - printk(KERN_ERR PFX "MAD received with unsupported base " - "version %d\n", mad->mad_hdr.base_version); + pr_err("MAD received with unsupported base version %d\n", + mad->mad_hdr.base_version); goto out; } @@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || + !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); @@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { @@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ib_free_recv_mad(mad_recv_wc); - deref_mad_agent(mad_agent_priv); - return; - } - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) + && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) + & IB_MGMT_RMPP_FLAG_ACTIVE)) { + /* user rmpp is in effect + * and this is an active RMPP MAD + */ + mad_recv_wc->wc->wr_id = 0; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); + } else { + /* not user rmpp, revert to normal behavior and + * drop the mad */ + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + } else { + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; - mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, - mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + /* Defined behavior is to complete response before request */ + mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); @@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) { - printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " - "for response buffer\n"); + dev_err(&port_priv->device->dev, + "ib_mad_recv_done_handler no memory for response buffer\n"); goto out; } @@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; @@ -2176,7 +2249,8 @@ retry: ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, &bad_send_wr); if (ret) { - printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); + dev_err(&port_priv->device->dev, + "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; @@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) - printk(KERN_ERR PFX "mad_error_handler - " - "ib_modify_qp to RTS : %d\n", ret); + dev_err(&port_priv->device->dev, + "mad_error_handler - ib_modify_qp to RTS : %d\n", + ret); else mark_sends_for_retry(qp_info); } @@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work) if (local->mad_priv) { recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { - printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); + dev_err(&mad_agent_priv->agent.device->dev, + "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } @@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); - if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: @@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } else { mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!mad_priv) { - printk(KERN_ERR PFX "No memory for receive buffer\n"); + dev_err(&qp_info->port_priv->device->dev, + "No memory for receive buffer\n"); ret = -ENOMEM; break; } @@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof mad_priv->header, DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, mad_priv); - printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); + dev_err(&qp_info->port_priv->device->dev, + "ib_post_recv failed: %d\n", ret); break; } } while (post); @@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { - printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); + dev_err(&port_priv->device->dev, + "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } @@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "INIT: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to INIT: %d\n", + i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTR: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTR: %d\n", + i, ret); goto out; } @@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTS: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTS: %d\n", + i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { - printk(KERN_ERR PFX "Failed to request completion " - "notification: %d\n", ret); + dev_err(&port_priv->device->dev, + "Failed to request completion notification: %d\n", + ret); goto out; } @@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { - printk(KERN_ERR PFX "Couldn't post receive WRs\n"); + dev_err(&port_priv->device->dev, + "Couldn't post receive WRs\n"); goto out; } } @@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context) struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ - printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n", + dev_err(&qp_info->port_priv->device->dev, + "Fatal error (%d) on MAD QP (%d)\n", event->event, qp_info->qp->qp_num); } @@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info, qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { - printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", - get_spl_qp_index(qp_type)); + dev_err(&qp_info->port_priv->device->dev, + "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } @@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device, /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); + dev_err(&device->dev, "No memory for ib_mad_port_private\n"); return -ENOMEM; } @@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device, ib_mad_thread_completion_handler, NULL, port_priv, cq_size, 0); if (IS_ERR(port_priv->cq)) { - printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); + dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); goto error3; } port_priv->pd = ib_alloc_pd(device); if (IS_ERR(port_priv->pd)) { - printk(KERN_ERR PFX "Couldn't create ib_mad PD\n"); + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); goto error4; } port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(port_priv->mr)) { - printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n"); + dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n"); ret = PTR_ERR(port_priv->mr); goto error5; } @@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device, ret = ib_mad_port_start(port_priv); if (ret) { - printk(KERN_ERR PFX "Couldn't start port\n"); + dev_err(&device->dev, "Couldn't start port\n"); goto error9; } @@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - printk(KERN_ERR PFX "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); @@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device) for (i = start; i <= end; i++) { if (ib_mad_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } if (ib_agent_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d " - "for agents\n", - device->name, i); + dev_err(&device->dev, + "Couldn't open port %d for agents\n", i); goto error_agent; } } @@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device) error_agent: if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't close port %d\n", i); error: i--; while (i >= start) { if (ib_agent_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, i); + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't close port %d\n", i); i--; } } @@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device) } for (i = 0; i < num_ports; i++, cur_port++) { if (ib_agent_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, cur_port); + dev_err(&device->dev, + "Couldn't close port %d for agents\n", + cur_port); if (ib_mad_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + dev_err(&device->dev, "Couldn't close port %d\n", + cur_port); } } @@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void) SLAB_HWCACHE_ALIGN, NULL); if (!ib_mad_cache) { - printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); + pr_err("Couldn't create ib_mad cache\n"); ret = -ENOMEM; goto error1; } @@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void) INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { - printk(KERN_ERR PFX "Couldn't register ib_mad client\n"); + pr_err("Couldn't register ib_mad client\n"); ret = -EINVAL; goto error2; } diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9430ab4969c5..d1a0b0ee9444 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -42,9 +42,6 @@ #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> - -#define PFX "ib_mad: " - #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ /* QP and CQ parameters */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 233eaf541f55..c38f030f0dc9 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, - recv_handler, sa_dev); + recv_handler, sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) goto err; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a3a2e9c1639b..df0c4f605a21 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -105,6 +105,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->offset = addr & ~PAGE_MASK; umem->page_size = PAGE_SIZE; + umem->pid = get_task_pid(current, PIDTYPE_PID); /* * We ask for writable memory if any access flags other than * "remote read" are set. "Local write" and "remote write" @@ -198,6 +199,7 @@ out: if (ret < 0) { if (need_release) __ib_umem_release(context->device, umem, 0); + put_pid(umem->pid); kfree(umem); } else current->mm->pinned_vm = locked; @@ -230,15 +232,19 @@ void ib_umem_release(struct ib_umem *umem) { struct ib_ucontext *context = umem->context; struct mm_struct *mm; + struct task_struct *task; unsigned long diff; __ib_umem_release(umem->context->device, umem, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(umem); - return; - } + task = get_pid_task(umem->pid, PIDTYPE_PID); + put_pid(umem->pid); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; @@ -262,9 +268,10 @@ void ib_umem_release(struct ib_umem *umem) } else down_write(&mm->mmap_sem); - current->mm->pinned_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(umem); } EXPORT_SYMBOL(ib_umem_release); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1acb99100556..928cdd20e2d1 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -33,6 +33,8 @@ * SOFTWARE. */ +#define pr_fmt(fmt) "user_mad: " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> @@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { - copy_offset = IB_MGMT_MAD_HDR; - rmpp_active = 0; - } else { + + if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE; + IB_MGMT_RMPP_FLAG_ACTIVE; + } else { + copy_offset = IB_MGMT_MAD_HDR; + rmpp_active = 0; } data_len = count - hdr_size(file) - hdr_len; @@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_mad->mad_hdr.tid = *tid; } - spin_lock_irq(&file->send_lock); - ret = is_duplicate(file, packet); - if (!ret) + if (!ib_mad_kernel_rmpp_agent(agent) + && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); - spin_unlock_irq(&file->send_lock); - if (ret) { - ret = -EINVAL; - goto err_msg; + spin_unlock_irq(&file->send_lock); + } else { + spin_lock_irq(&file->send_lock); + ret = is_duplicate(file, packet); + if (!ret) + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + if (ret) { + ret = -EINVAL; + goto err_msg; + } } ret = ib_post_send_mad(packet->msg, NULL); @@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, mutex_lock(&file->mutex); if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; } @@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, } if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid QPN %d specified\n", + ureq.qpn); ret = -EINVAL; goto out; } @@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, if (!__get_agent(file, agent_id)) goto found; + dev_notice(file->port->dev, + "ib_umad_reg_agent: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; memcpy(req.oui, ureq.oui, sizeof req.oui); @@ -657,7 +678,7 @@ found: ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, - send_handler, recv_handler, file); + send_handler, recv_handler, file, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; @@ -673,10 +694,11 @@ found: if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { - printk(KERN_WARNING "user_mad: process %s did not enable " - "P_Key index support.\n", current->comm); - printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " - "has info on the new ABI.\n"); + dev_warn(file->port->dev, + "process %s did not enable P_Key index support.\n", + current->comm); + dev_warn(file->port->dev, + " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } @@ -694,6 +716,119 @@ out: return ret; } +static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) +{ + struct ib_user_mad_reg_req2 ureq; + struct ib_mad_reg_req req; + struct ib_mad_agent *agent = NULL; + int agent_id; + int ret; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid device\n"); + ret = -EPIPE; + goto out; + } + + if (copy_from_user(&ureq, arg, sizeof(ureq))) { + ret = -EFAULT; + goto out; + } + + if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid QPN %d specified\n", + ureq.qpn); + ret = -EINVAL; + goto out; + } + + if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", + ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); + ret = -EINVAL; + + if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP, + (u32 __user *) (arg + offsetof(struct + ib_user_mad_reg_req2, flags)))) + ret = -EFAULT; + + goto out; + } + + for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) + if (!__get_agent(file, agent_id)) + goto found; + + dev_notice(file->port->dev, + "ib_umad_reg_agent2: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); + ret = -ENOMEM; + goto out; + +found: + if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); + req.mgmt_class = ureq.mgmt_class; + req.mgmt_class_version = ureq.mgmt_class_version; + if (ureq.oui & 0xff000000) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", + ureq.oui); + ret = -EINVAL; + goto out; + } + req.oui[2] = ureq.oui & 0x0000ff; + req.oui[1] = (ureq.oui & 0x00ff00) >> 8; + req.oui[0] = (ureq.oui & 0xff0000) >> 16; + memcpy(req.method_mask, ureq.method_mask, + sizeof(req.method_mask)); + } + + agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, + ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, + ureq.mgmt_class ? &req : NULL, + ureq.rmpp_version, + send_handler, recv_handler, file, + ureq.flags); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + agent = NULL; + goto out; + } + + if (put_user(agent_id, + (u32 __user *)(arg + + offsetof(struct ib_user_mad_reg_req2, id)))) { + ret = -EFAULT; + goto out; + } + + if (!file->already_used) { + file->already_used = 1; + file->use_pkey_index = 1; + } + + file->agent[agent_id] = agent; + ret = 0; + +out: + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + + return ret; +} + + static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; @@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); default: return -ENOIOCTLCMD; } @@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); default: return -ENOIOCTLCMD; } @@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO, static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); -static int find_overflow_devnum(void) +static int find_overflow_devnum(struct ib_device *device) { int ret; @@ -991,7 +1130,8 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { - printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); + dev_err(&device->dev, + "couldn't register dynamic device number\n"); return ret; } } @@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); - devnum = find_overflow_devnum(); + devnum = find_overflow_devnum(device); if (devnum < 0) return -1; @@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void) ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { - printk(KERN_ERR "user_mad: couldn't register device number\n"); + pr_err("couldn't register device number\n"); goto out; } umad_class = class_create(THIS_MODULE, "infiniband_mad"); if (IS_ERR(umad_class)) { ret = PTR_ERR(umad_class); - printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); + pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } @@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void) ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); + pr_err("couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&umad_client); if (ret) { - printk(KERN_ERR "user_mad: couldn't register ib_umad client\n"); + pr_err("couldn't register ib_umad client\n"); goto out_class; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a283274a5a09..643c08a025a5 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(rereg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(alloc_mw); IB_UVERBS_DECLARE_CMD(dealloc_mw); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ea6203ee7bcc..5ba2a86aab6a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1002,6 +1002,99 @@ err_free: return ret; } +ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_rereg_mr cmd; + struct ib_uverbs_rereg_mr_resp resp; + struct ib_udata udata; + struct ib_pd *pd = NULL; + struct ib_mr *mr; + struct ib_pd *old_pd; + int ret; + struct ib_uobject *uobj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + return -EINVAL; + + if ((cmd.flags & IB_MR_REREG_TRANS) && + (!cmd.start || !cmd.hca_va || 0 >= cmd.length || + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, + file->ucontext); + + if (!uobj) + return -EINVAL; + + mr = uobj->object; + + if (cmd.flags & IB_MR_REREG_ACCESS) { + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + goto put_uobjs; + } + + if (cmd.flags & IB_MR_REREG_PD) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto put_uobjs; + } + } + + if (atomic_read(&mr->usecnt)) { + ret = -EBUSY; + goto put_uobj_pd; + } + + old_pd = mr->pd; + ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, &udata); + if (!ret) { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_inc(&pd->usecnt); + mr->pd = pd; + atomic_dec(&old_pd->usecnt); + } + } else { + goto put_uobj_pd; + } + + memset(&resp, 0, sizeof(resp)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + else + ret = in_len; + +put_uobj_pd: + if (cmd.flags & IB_MR_REREG_PD) + put_pd_read(pd); + +put_uobjs: + + put_uobj_write(mr->uobject); + + return ret; +} + ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -2425,6 +2518,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; + attr.vlan_id = 0; + memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); ah = ib_create_ah(pd, &attr); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 08219fb3338b..71ab83fde472 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, @@ -476,6 +477,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.element = element; entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); @@ -501,6 +503,10 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; + /* for XRC target qp's, check that qp is live */ + if (!event->element.qp->uobject || !event->element.qp->uobject->live) + return; + uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index e7bee46868d1..abd97247443e 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -140,5 +140,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, dst->packet_life_time = src->packet_life_time; dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; + + memset(dst->smac, 0, sizeof(dst->smac)); + memset(dst->dmac, 0, sizeof(dst->dmac)); + dst->vlan_id = 0xffff; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 00400c352c1a..766a71ccefed 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -604,16 +604,14 @@ static int c2_up(struct net_device *netdev) tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); c2_port->mem_size = tx_size + rx_size; - c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size, - &c2_port->dma); + c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size, + &c2_port->dma); if (c2_port->mem == NULL) { pr_debug("Unable to allocate memory for " "host descriptor rings\n"); return -ENOMEM; } - memset(c2_port->mem, 0, c2_port->mem_size); - /* Create the Rx host descriptor ring */ if ((ret = c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 49e0e8533f74..1b63185b4ad4 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) mq->msg_pool.host, dma_unmap_addr(mq, mapping)); } -static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, - int msg_size) +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, + size_t q_size, size_t msg_size) { u8 *pool_start; + if (q_size > SIZE_MAX / msg_size) + return -EINVAL; + pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, &mq->host_dma, GFP_KERNEL); if (!pool_start) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c2fb71c182a8..fb61f6685809 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -236,10 +236,12 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) static void set_emss(struct c4iw_ep *ep, u16 opt) { ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - - sizeof(struct iphdr) - sizeof(struct tcphdr); + ((AF_INET == ep->com.remote_addr.ss_family) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - + sizeof(struct tcphdr); ep->mss = ep->emss; if (GET_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; + ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); if (ep->emss < 128) ep->emss = 128; if (ep->emss & 7) @@ -415,6 +417,7 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, return NULL; if (!our_interface(dev, n->dev) && !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); dst_release(&rt->dst); return NULL; } @@ -581,11 +584,14 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, } static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts) + unsigned int *idx, int use_ts, int ipv6) { - unsigned short hdr_size = sizeof(struct iphdr) + + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + sizeof(struct tcphdr) + - (use_ts ? 12 : 0); + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); unsigned short data_size = mtu - hdr_size; cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); @@ -634,7 +640,8 @@ static int send_connect(struct c4iw_ep *ep) set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -668,6 +675,7 @@ static int send_connect(struct c4iw_ep *ep) if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { opt2 |= T5_OPT_2_VALID; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -713,8 +721,6 @@ static int send_connect(struct c4iw_ep *ep) } else { u32 isn = (prandom_u32() & ~7UL) - 1; - opt2 |= T5_OPT_2_VALID; - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ if (peer2peer) isn += 4; @@ -756,10 +762,10 @@ static int send_connect(struct c4iw_ep *ep) t5_req6->peer_ip_lo = *((__be64 *) (ra6->sin6_addr.s6_addr + 8)); t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = (__force __be64)cpu_to_be32( + t5_req6->params = cpu_to_be64(V_FILTER_TUPLE( cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], - ep->l2t)); + ep->l2t))); t5_req6->rsvd = cpu_to_be32(isn); PDBG("%s snd_isn %u\n", __func__, be32_to_cpu(t5_req6->rsvd)); @@ -1763,7 +1769,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -2162,7 +2169,8 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, ep->hwtid)); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp); + enable_tcp_timestamps && req->tcpopt.tstamp, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index f25df5276c22..72f1f052e88c 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -60,7 +60,7 @@ int c4iw_wr_log = 0; module_param(c4iw_wr_log, int, 0444); MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); -int c4iw_wr_log_size_order = 12; +static int c4iw_wr_log_size_order = 12; module_param(c4iw_wr_log_size_order, int, 0444); MODULE_PARM_DESC(c4iw_wr_log_size_order, "Number of entries (log2) in the work request timing log."); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index fbe6051af254..c9df0549f51d 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) chp = get_chp(dev, qid); if (chp) { + t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index c158fcc02bca..41cd6882b648 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); @@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); - flushed = c4iw_flush_sq(qhp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index df5edfa31a8f..c04e5134b30c 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq) return !wq->rq.queue[wq->rq.size].status.db_off; } +enum t4_cq_flags { + CQ_ARMED = 1, +}; + struct t4_cq { struct t4_cqe *queue; dma_addr_t dma_addr; @@ -544,12 +548,19 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + unsigned long flags; }; +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + static inline int t4_arm_cq(struct t4_cq *cq, int se) { u32 val; + set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_MASK) { val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | INGRESSQID(cq->cqid); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index e0c404bdc4a8..4977082e081f 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 43f2d0424d4f..e890e5ba0e01 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -726,7 +726,7 @@ bail: * @dd: the infinipath device * @pkeys: the PKEY table */ -static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port) { struct ipath_portdata *pd; int i; @@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) } if (changed) { u64 pkey; + struct ib_event event; pkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | @@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) (unsigned long long) pkey); ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, pkey); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev->ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); } return 0; } static int recv_subn_set_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev) + struct ib_device *ibdev, u8 port) { u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); __be16 *p = (__be16 *) smp->data; @@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, ret = recv_subn_set_portinfo(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_set_pkeytable(smp, ibdev); + ret = recv_subn_set_pkeytable(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_SM_INFO: if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index dc66c4506916..1da1252dcdb3 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -54,7 +54,7 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages, /* call with current->mm->mmap_sem held */ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p, struct vm_area_struct **vma) + struct page **p) { unsigned long lock_limit; size_t got; @@ -74,7 +74,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, num_pages - got, 1, 1, - p + got, vma); + p + got, NULL); if (ret < 0) goto bail_release; } @@ -165,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages, down_write(¤t->mm->mmap_sem); - ret = __ipath_get_user_pages(start_page, num_pages, p, NULL); + ret = __ipath_get_user_pages(start_page, num_pages, p); up_write(¤t->mm->mmap_sem); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 287ad0564acd..82a7dd87089b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0f7027e7db13..8b72cf392b34 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -59,6 +59,7 @@ #define MLX4_IB_FLOW_MAX_PRIO 0xFFF #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF +#define MLX4_IB_CARD_REV_A0 0xA0 MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); @@ -119,6 +120,17 @@ static int check_flow_steering_support(struct mlx4_dev *dev) return dmfs; } +static int num_ib_ports(struct mlx4_dev *dev) +{ + int ib_ports = 0; + int i; + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_ports++; + + return ib_ports; +} + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -126,6 +138,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; + int have_ib_ports; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -142,6 +155,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, memset(props, 0, sizeof *props); + have_ib_ports = num_ib_ports(dev->dev); + props->fw_ver = dev->dev->caps.fw_ver; props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | @@ -152,13 +167,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; - if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) + if (dev->dev->caps.max_gso_sz && + (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) && + (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)) props->device_cap_flags |= IB_DEVICE_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; @@ -357,7 +374,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; - spin_lock(&iboe->lock); + spin_lock_bh(&iboe->lock); ndev = iboe->netdevs[port - 1]; if (!ndev) goto out_unlock; @@ -369,7 +386,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); out_unlock: - spin_unlock(&iboe->lock); + spin_unlock_bh(&iboe->lock); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; @@ -811,11 +828,11 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, if (!mqp->port) return 0; - spin_lock(&mdev->iboe.lock); + spin_lock_bh(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; if (ndev) dev_hold(ndev); - spin_unlock(&mdev->iboe.lock); + spin_unlock_bh(&mdev->iboe.lock); if (ndev) { ret = 1; @@ -910,8 +927,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp, const struct default_rules *pdefault_rules = default_table; u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); - for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, - pdefault_rules++) { + for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) { __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; memset(&field_types, 0, sizeof(field_types)); @@ -965,8 +981,7 @@ static int __mlx4_ib_create_default_rules( int size = 0; int i; - for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ - sizeof(pdefault_rules->rules_create_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { int ret; union ib_flow_spec ib_spec; switch (pdefault_rules->rules_create_list[i]) { @@ -1091,6 +1106,30 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) return err; } +static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr, + u64 *reg_id) +{ + void *ib_flow; + union ib_flow_spec *ib_spec; + struct mlx4_dev *dev = to_mdev(qp->device)->dev; + int err = 0; + + if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + return 0; /* do nothing */ + + ib_flow = flow_attr + 1; + ib_spec = (union ib_flow_spec *)ib_flow; + + if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1) + return 0; /* do nothing */ + + err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac, + flow_attr->port, qp->qp_num, + MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff), + reg_id); + return err; +} + static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) @@ -1134,12 +1173,24 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], &mflow->reg_id[i]); if (err) - goto err_free; + goto err_create_flow; + i++; + } + + if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { + err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]); + if (err) + goto err_create_flow; i++; } return &mflow->ibflow; +err_create_flow: + while (i) { + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]); + i--; + } err_free: kfree(mflow); return ERR_PTR(err); @@ -1264,11 +1315,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { - spin_lock(&mdev->iboe.lock); + spin_lock_bh(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; if (ndev) dev_hold(ndev); - spin_unlock(&mdev->iboe.lock); + spin_unlock_bh(&mdev->iboe.lock); if (ndev) dev_put(ndev); list_del(&ge->list); @@ -1389,6 +1440,9 @@ static void update_gids_task(struct work_struct *work) int err; struct mlx4_dev *dev = gw->dev->dev; + if (!gw->dev->ib_active) + return; + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); @@ -1419,6 +1473,9 @@ static void reset_gids_task(struct work_struct *work) int err; struct mlx4_dev *dev = gw->dev->dev; + if (!gw->dev->ib_active) + return; + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("reset gid table failed\n"); @@ -1553,7 +1610,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, return 0; iboe = &ibdev->iboe; - spin_lock(&iboe->lock); + spin_lock_bh(&iboe->lock); for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) if ((netif_is_bond_master(real_dev) && @@ -1563,7 +1620,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, update_gid_table(ibdev, port, gid, event == NETDEV_DOWN, 0); - spin_unlock(&iboe->lock); + spin_unlock_bh(&iboe->lock); return 0; } @@ -1636,13 +1693,21 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, new_smac = mlx4_mac_to_u64(dev->dev_addr); read_unlock(&dev_base_lock); + atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); + + /* no need for update QP1 and mac registration in non-SRIOV */ + if (!mlx4_is_mfunc(ibdev->dev)) + return; + mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); qp = ibdev->qp1_proxy[port - 1]; if (qp) { int new_smac_index; - u64 old_smac = qp->pri.smac; + u64 old_smac; struct mlx4_update_qp_params update_params; + mutex_lock(&qp->mutex); + old_smac = qp->pri.smac; if (new_smac == old_smac) goto unlock; @@ -1652,22 +1717,25 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, goto unlock; update_params.smac_index = new_smac_index; - if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC, &update_params)) { release_mac = new_smac; goto unlock; } - + /* if old port was zero, no mac was yet registered for this QP */ + if (qp->pri.smac_port) + release_mac = old_smac; qp->pri.smac = new_smac; + qp->pri.smac_port = port; qp->pri.smac_index = new_smac_index; - - release_mac = old_smac; } unlock: - mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); if (release_mac != MLX4_IB_INVALID_MAC) mlx4_unregister_mac(ibdev->dev, port, release_mac); + if (qp) + mutex_unlock(&qp->mutex); + mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); } static void mlx4_ib_get_dev_addr(struct net_device *dev, @@ -1678,6 +1746,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, struct inet6_dev *in6_dev; union ib_gid *pgid; struct inet6_ifaddr *ifp; + union ib_gid default_gid; #endif union ib_gid gid; @@ -1698,12 +1767,15 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, in_dev_put(in_dev); } #if IS_ENABLED(CONFIG_IPV6) + mlx4_make_default_gid(dev, &default_gid); /* IPv6 gids */ in6_dev = in6_dev_get(dev); if (in6_dev) { read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { pgid = (union ib_gid *)&ifp->addr; + if (!memcmp(pgid, &default_gid, sizeof(*pgid))) + continue; update_gid_table(ibdev, port, pgid, 0, 0); } read_unlock_bh(&in6_dev->lock); @@ -1725,24 +1797,33 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) struct net_device *dev; struct mlx4_ib_iboe *iboe = &ibdev->iboe; int i; + int err = 0; - for (i = 1; i <= ibdev->num_ports; ++i) - if (reset_gid_table(ibdev, i)) - return -1; + for (i = 1; i <= ibdev->num_ports; ++i) { + if (rdma_port_get_link_layer(&ibdev->ib_dev, i) == + IB_LINK_LAYER_ETHERNET) { + err = reset_gid_table(ibdev, i); + if (err) + goto out; + } + } read_lock(&dev_base_lock); - spin_lock(&iboe->lock); + spin_lock_bh(&iboe->lock); for_each_netdev(&init_net, dev) { u8 port = mlx4_ib_get_dev_port(dev, ibdev); - if (port) + /* port will be non-zero only for ETH ports */ + if (port) { + mlx4_ib_set_default_gid(ibdev, dev, port); mlx4_ib_get_dev_addr(dev, ibdev, port); + } } - spin_unlock(&iboe->lock); + spin_unlock_bh(&iboe->lock); read_unlock(&dev_base_lock); - - return 0; +out: + return err; } static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, @@ -1756,7 +1837,7 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, iboe = &ibdev->iboe; - spin_lock(&iboe->lock); + spin_lock_bh(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { enum ib_port_state port_state = IB_PORT_NOP; struct net_device *old_master = iboe->masters[port - 1]; @@ -1788,35 +1869,47 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; mlx4_ib_set_default_gid(ibdev, curr_netdev, port); - } else { - reset_gid_table(ibdev, port); - } - /* if using bonding/team and a slave port is down, we don't the bond IP - * based gids in the table since flows that select port by gid may get - * the down port. - */ - if (curr_master && (port_state == IB_PORT_DOWN)) { - reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, curr_netdev, port); - } - /* if bonding is used it is possible that we add it to masters - * only after IP address is assigned to the net bonding - * interface. - */ - if (curr_master && (old_master != curr_master)) { - reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, curr_netdev, port); - mlx4_ib_get_dev_addr(curr_master, ibdev, port); - } + if (curr_master) { + /* if using bonding/team and a slave port is down, we + * don't want the bond IP based gids in the table since + * flows that select port by gid may get the down port. + */ + if (port_state == IB_PORT_DOWN) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, + curr_netdev, + port); + } else { + /* gids from the upper dev (bond/team) + * should appear in port's gid table + */ + mlx4_ib_get_dev_addr(curr_master, + ibdev, port); + } + } + /* if bonding is used it is possible that we add it to + * masters only after IP address is assigned to the + * net bonding interface. + */ + if (curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, + curr_netdev, port); + mlx4_ib_get_dev_addr(curr_master, ibdev, port); + } - if (!curr_master && (old_master != curr_master)) { + if (!curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, + curr_netdev, port); + mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); + } + } else { reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, curr_netdev, port); - mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); } } - spin_unlock(&iboe->lock); + spin_unlock_bh(&iboe->lock); if (update_qps_port > 0) mlx4_ib_update_qps(ibdev, dev, update_qps_port); @@ -2007,6 +2100,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2059,6 +2153,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; + ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; @@ -2156,6 +2251,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_free_bitmap; } + for (j = 1; j <= ibdev->dev->caps.num_ports; j++) + atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); + if (ib_register_device(&ibdev->ib_dev, NULL)) goto err_steer_free_bitmap; @@ -2192,12 +2290,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } } #endif - for (i = 1 ; i <= ibdev->num_ports ; ++i) - reset_gid_table(ibdev, i); - rtnl_lock(); - mlx4_ib_scan_netdevs(ibdev, NULL, 0); - rtnl_unlock(); - mlx4_ib_init_gid_table(ibdev); + if (mlx4_ib_init_gid_table(ibdev)) + goto err_notif; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -2345,6 +2439,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; + ibdev->ib_active = false; + flush_workqueue(wq); + mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 369da3ca5d64..6eb743f65f6f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -451,6 +451,7 @@ struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; struct net_device *masters[MLX4_MAX_PORTS]; + atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct notifier_block nb_inet; struct notifier_block nb_inet6; @@ -788,5 +789,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index cb2a8727f3fb..8f9325cfc85d 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + /* Force registering the memory as writable. */ + /* Used for memory re-registeration. HCA protects the access */ mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); + access_flags | IB_ACCESS_LOCAL_WRITE, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -183,6 +185,93 @@ err_free: return ERR_PTR(err); } +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(mr->device); + struct mlx4_ib_mr *mmr = to_mmr(mr); + struct mlx4_mpt_entry *mpt_entry; + struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; + int err; + + /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, + * we assume that the calls can't run concurrently. Otherwise, a + * race exists. + */ + err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); + + if (err) + return err; + + if (flags & IB_MR_REREG_PD) { + err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, + to_mpd(pd)->pdn); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_ACCESS) { + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, + convert_access(mr_access_flags)); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_TRANS) { + int shift; + int err; + int n; + + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + mmr->umem = ib_umem_get(mr->uobject->context, start, length, + mr_access_flags | + IB_ACCESS_LOCAL_WRITE, + 0); + if (IS_ERR(mmr->umem)) { + err = PTR_ERR(mmr->umem); + /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ + mmr->umem = NULL; + goto release_mpt_entry; + } + n = ib_umem_page_count(mmr->umem); + shift = ilog2(mmr->umem->page_size); + + err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, + virt_addr, length, n, shift, + *pmpt_entry); + if (err) { + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + mmr->mmr.iova = virt_addr; + mmr->mmr.size = length; + + err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); + if (err) { + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + } + + /* If we couldn't transfer the MR to the HCA, just remember to + * return a failure. But dereg_mr will free the resources. + */ + err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); + if (!err && flags & IB_MR_REREG_ACCESS) + mmr->mmr.access = mr_access_flags; + +release_mpt_entry: + mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); + + return err; +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 67780452f0cf..9c5150c3cb31 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -964,9 +964,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); - if (qp->pri.smac) { + if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; + qp->pri.smac_port = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); @@ -1325,7 +1326,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, * If one was already assigned, but the new mac differs, * unregister the old one and register the new one. */ - if (!smac_info->smac || smac_info->smac != smac) { + if ((!smac_info->smac && !smac_info->smac_port) || + smac_info->smac != smac) { /* register candidate now, unreg if needed, after success */ smac_index = mlx4_register_mac(dev->dev, port, smac); if (smac_index >= 0) { @@ -1390,21 +1392,13 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, struct mlx4_qp_context *context) { - struct net_device *ndev; u64 u64_mac; int smac_index; - - ndev = dev->iboe.netdevs[qp->port - 1]; - if (ndev) { - smac = ndev->dev_addr; - u64_mac = mlx4_mac_to_u64(smac); - } else { - u64_mac = dev->dev->caps.def_mac[qp->port]; - } + u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]); context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); - if (!qp->pri.smac) { + if (!qp->pri.smac && !qp->pri.smac_port) { smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); if (smac_index >= 0) { qp->pri.candidate_smac_index = smac_index; @@ -1432,6 +1426,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int steer_qp = 0; int err = -EINVAL; + /* APM is not supported under RoCE */ + if (attr_mask & IB_QP_ALT_PATH && + rdma_port_get_link_layer(&dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET) + return -ENOTSUPP; + context = kzalloc(sizeof *context, GFP_KERNEL); if (!context) return -ENOMEM; @@ -1677,9 +1677,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; + if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + /* set QP to receive both tunneled & non-tunneled packets */ + if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET))) + context->srqn = cpu_to_be32(7 << 28); + } + } if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { int is_eth = rdma_port_get_link_layer( @@ -1780,9 +1786,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_reg(dev, qp, 0); } - if (qp->pri.smac) { + if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; + qp->pri.smac_port = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); @@ -1806,11 +1813,12 @@ out: if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); - if (qp->pri.candidate_smac) { + if (qp->pri.candidate_smac || + (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) { if (err) { mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); } else { - if (qp->pri.smac) + if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = qp->pri.candidate_smac; qp->pri.smac_index = qp->pri.candidate_smac_index; @@ -2083,6 +2091,16 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, return 0; } +static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) +{ + int i; + + for (i = ETH_ALEN; i; i--) { + dst_mac[i - 1] = src_mac & 0xff; + src_mac >>= 8; + } +} + static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { @@ -2197,7 +2215,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 *smac; struct in6_addr in6; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; @@ -2210,12 +2227,17 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); - if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) - smac = to_mdev(sqp->qp.ibqp.device)-> - iboe.netdevs[sqp->qp.port - 1]->dev_addr; - else /* use the src mac of the tunnel */ - smac = ah->av.eth.s_mac; - memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]); + u8 smac[ETH_ALEN]; + + mlx4_u64_to_smac(smac, mac); + memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN); + } else { + /* use the src mac of the tunnel */ + memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN); + } + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); if (!is_vlan) { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index e4056279166d..10cfce5119a9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(-EINVAL); entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.max_cqes) + if (entries > dev->mdev->caps.gen.max_cqes) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; u32 fsel; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) return -ENOSYS; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int uninitialized_var(cqe_size); unsigned long flags; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } @@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -EINVAL; entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.max_cqes + 1) + if (entries > dev->mdev->caps.gen.max_cqes + 1) return -EINVAL; if (entries == ibcq->cqe + 1) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index b514bbb5610f..657af9a1167c 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ? + dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d8907b20522a..1ba6c42e4df8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + struct mlx5_general_caps *gen; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; u64 flags; + gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = dev->mdev->caps.flags; + flags = gen->flags; if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -213,30 +215,31 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; - props->page_size_cap = dev->mdev->caps.min_page_sz; - props->max_qp = 1 << dev->mdev->caps.log_max_qp; - props->max_qp_wr = dev->mdev->caps.max_wqes; - max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / + props->page_size_cap = gen->min_page_sz; + props->max_qp = 1 << gen->log_max_qp; + props->max_qp_wr = gen->max_wqes; + max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << dev->mdev->caps.log_max_cq; - props->max_cqe = dev->mdev->caps.max_cqes - 1; - props->max_mr = 1 << dev->mdev->caps.log_max_mkey; - props->max_pd = 1 << dev->mdev->caps.log_max_pd; - props->max_qp_rd_atom = dev->mdev->caps.max_ra_req_qp; - props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp; + props->max_cq = 1 << gen->log_max_cq; + props->max_cqe = gen->max_cqes - 1; + props->max_mr = 1 << gen->log_max_mkey; + props->max_pd = 1 << gen->log_max_pd; + props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; + props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; + props->max_srq = 1 << gen->log_max_srq; + props->max_srq_wr = gen->max_srq_wqes - 1; + props->local_ca_ack_delay = gen->local_ca_ack_delay; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = 1 << dev->mdev->caps.log_max_srq; - props->max_srq_wr = dev->mdev->caps.max_srq_wqes - 1; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = dev->mdev->caps.local_ca_ack_delay; + props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << dev->mdev->caps.log_max_mcg; - props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg; + props->max_mcast_grp = 1 << gen->log_max_mcg; + props->max_mcast_qp_attach = gen->max_qp_mcg; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ @@ -254,10 +257,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + struct mlx5_general_caps *gen; int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->mdev->caps.num_ports) { + gen = &dev->mdev->caps.gen; + if (port < 1 || port > gen->num_ports) { mlx5_ib_warn(dev, "invalid port number %d\n", port); return -EINVAL; } @@ -288,8 +293,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << to_mdev(ibdev)->mdev->caps.log_max_msg; - props->pkey_tbl_len = to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len; + props->max_msg_sz = 1 << gen->log_max_msg; + props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -316,7 +321,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (dev->mdev->caps.ext_port_cap[port - 1] & + if (gen->ext_port_cap[port - 1] & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; @@ -470,6 +475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; + struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -480,6 +486,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; + gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -512,14 +519,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << dev->mdev->caps.log_max_qp; - resp.bf_reg_size = dev->mdev->caps.bf_reg_size; + resp.qp_tab_size = 1 << gen->log_max_qp; + resp.bf_reg_size = gen->bf_reg_size; resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz; - resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz; - resp.max_send_wqebb = dev->mdev->caps.max_wqes; - resp.max_recv_wr = dev->mdev->caps.max_wqes; - resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes; + resp.max_sq_desc_sz = gen->max_sq_desc_sz; + resp.max_rq_desc_sz = gen->max_rq_desc_sz; + resp.max_send_wqebb = gen->max_wqes; + resp.max_recv_wr = gen->max_wqes; + resp.max_srq_recv_wr = gen->max_srq_wqes; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -565,7 +572,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = dev->mdev->caps.num_ports; + resp.num_ports = gen->num_ports; err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -650,13 +657,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return -EINVAL; idx = get_index(vma->vm_pgoff); + if (idx >= uuari->num_uars) + return -EINVAL; + pfn = uar_index2pfn(dev, uuari->uars[idx].index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, (unsigned long long)pfn); - if (idx >= uuari->num_uars) - return -EINVAL; - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) @@ -967,9 +974,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { + struct mlx5_general_caps *gen; int port; - for (port = 1; port <= dev->mdev->caps.num_ports; port++) + gen = &dev->mdev->caps.gen; + for (port = 1; port <= gen->num_ports; port++) mlx5_query_ext_port_caps(dev, port); } @@ -977,9 +986,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; + struct mlx5_general_caps *gen; int err = 0; int port; + gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -994,14 +1005,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= dev->mdev->caps.num_ports; port++) { + for (port = 1; port <= gen->num_ports; port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); break; } - dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys; - dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len; + gen->port[port - 1].pkey_table_len = dprops->max_pkeys; + gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -1279,8 +1290,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey; - dev->num_ports = mdev->caps.num_ports; + dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; + dev->num_ports = mdev->caps.gen.num_ports; dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; dev->ib_dev.dma_device = &mdev->pdev->dev; @@ -1355,7 +1366,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { + if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= @@ -1414,8 +1425,8 @@ err_dealloc: static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; - destroy_umrc_res(dev); ib_unregister_device(&dev->ib_dev); + destroy_umrc_res(dev); destroy_dev_resources(&dev->devr); free_comp_eqs(dev); ib_dealloc_device(&dev->ib_dev); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index a3e81444c825..dae07eae9507 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -55,16 +55,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, u64 pfn; struct scatterlist *sg; int entry; + unsigned long page_shift = ilog2(umem->page_size); - addr = addr >> PAGE_SHIFT; + addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> page_shift; + pfn = sg_dma_address(sg) >> page_shift; for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; @@ -103,14 +104,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, *ncont = 0; } - *shift = PAGE_SHIFT + m; + *shift = page_shift + m; *count = i; } void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int umr) { - int shift = page_shift - PAGE_SHIFT; + unsigned long umem_page_shift = ilog2(umem->page_size); + int shift = page_shift - umem_page_shift; int mask = (1 << shift) - 1; int i, k; u64 cur = 0; @@ -121,11 +123,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> umem_page_shift; base = sg_dma_address(sg); for (k = 0; k < len; k++) { if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); + cur = base + (k << umem_page_shift); if (umr) cur |= 3; @@ -134,7 +136,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i >> shift, be64_to_cpu(pas[i >> shift])); } else mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); + base + (k << umem_page_shift)); i++; } } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 80b3c63eab5d..8ee7cb46e059 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -881,12 +881,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int order; int err; - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", - start, virt_addr, length); + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed\n"); + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); return (void *)umem; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7efe6e3f3542..e261a53f9a02 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -158,11 +158,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { + struct mlx5_general_caps *gen; int wqe_size; int wq_size; + gen = &dev->mdev->caps.gen; /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > dev->mdev->caps.max_wqes) + if (cap->max_recv_wr > gen->max_wqes) return -EINVAL; if (!has_rq) { @@ -182,10 +184,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; - if (wqe_size > dev->mdev->caps.max_rq_desc_sz) { + if (wqe_size > gen->max_rq_desc_sz) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, - dev->mdev->caps.max_rq_desc_sz); + gen->max_rq_desc_sz); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); @@ -266,9 +268,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { + struct mlx5_general_caps *gen; int wqe_size; int wq_size; + gen = &dev->mdev->caps.gen; if (!attr->cap.max_send_wr) return 0; @@ -277,9 +281,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (wqe_size < 0) return wqe_size; - if (wqe_size > dev->mdev->caps.max_sq_desc_sz) { + if (wqe_size > gen->max_sq_desc_sz) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", - wqe_size, dev->mdev->caps.max_sq_desc_sz); + wqe_size, gen->max_sq_desc_sz); return -EINVAL; } @@ -292,9 +296,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; - if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) { + if (qp->sq.wqe_cnt > gen->max_wqes) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", - qp->sq.wqe_cnt, dev->mdev->caps.max_wqes); + qp->sq.wqe_cnt, gen->max_wqes); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -309,11 +313,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { + struct mlx5_general_caps *gen; int desc_sz = 1 << qp->sq.wqe_shift; - if (desc_sz > dev->mdev->caps.max_sq_desc_sz) { + gen = &dev->mdev->caps.gen; + if (desc_sz > gen->max_sq_desc_sz) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", - desc_sz, dev->mdev->caps.max_sq_desc_sz); + desc_sz, gen->max_sq_desc_sz); return -EINVAL; } @@ -325,9 +331,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, qp->sq.wqe_cnt = ucmd->sq_wqe_count; - if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) { + if (qp->sq.wqe_cnt > gen->max_wqes) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", - qp->sq.wqe_cnt, dev->mdev->caps.max_wqes); + qp->sq.wqe_cnt, gen->max_wqes); return -EINVAL; } @@ -803,16 +809,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; + struct mlx5_general_caps *gen; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; + gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { @@ -851,9 +859,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) { + if (ucmd.sq_wqe_count > gen->max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, dev->mdev->caps.max_wqes); + ucmd.sq_wqe_count, gen->max_wqes); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); @@ -1144,6 +1152,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct mlx5_general_caps *gen; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -1161,11 +1170,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } + gen = &dev->mdev->caps.gen; switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) { + if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } @@ -1272,6 +1282,9 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { + struct mlx5_general_caps *gen; + + gen = &dev->mdev->caps.gen; if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { @@ -1279,7 +1292,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - dev->mdev->caps.stat_rate_support)) + gen->stat_rate_support)) --rate; } @@ -1290,8 +1303,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { + struct mlx5_general_caps *gen; int err; + gen = &dev->mdev->caps.gen; path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; @@ -1302,6 +1317,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, gen->port[port - 1].gid_table_len); + return -EINVAL; + } path->grh_mlid |= 1 << 7; path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; @@ -1317,22 +1337,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->static_rate = err; path->port = port; - if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) { - pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len); - return -EINVAL; - } - - path->grh_mlid |= 1 << 7; - path->mgid_index = ah->grh.sgid_index; - path->hop_limit = ah->grh.hop_limit; - path->tclass_flowlabel = - cpu_to_be32((ah->grh.traffic_class << 20) | - (ah->grh.flow_label)); - memcpy(path->rgid, ah->grh.dgid.raw, 16); - } - if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; @@ -1492,6 +1496,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; + struct mlx5_general_caps *gen; struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; @@ -1500,6 +1505,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; + gen = &dev->mdev->caps.gen; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -1539,7 +1545,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg; + context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg; } if (attr_mask & IB_QP_DEST_QPN) @@ -1685,9 +1691,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; + struct mlx5_general_caps *gen; int err = -EINVAL; int port; + gen = &dev->mdev->caps.gen; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -1699,21 +1707,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports)) + (attr->port_num == 0 || attr->port_num > gen->num_ports)) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len) + if (attr->pkey_index >= gen->port[port - 1].pkey_table_len) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp) + attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp)) goto out; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp) + attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp)) goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { @@ -2020,56 +2028,31 @@ static u8 bs_selector(int block_size) } } -static int format_selector(struct ib_sig_attrs *attr, - struct ib_sig_domain *domain, - int *selector) +static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) { + /* Valid inline section and allow BSF refresh */ + inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | + MLX5_BSF_REFRESH_DIF); + inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); + inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; -#define FORMAT_DIF_NONE 0 -#define FORMAT_DIF_CRC_INC 8 -#define FORMAT_DIF_CRC_NO_INC 12 -#define FORMAT_DIF_CSUM_INC 13 -#define FORMAT_DIF_CSUM_NO_INC 14 + if (domain->sig.dif.ref_remap) + inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; - switch (domain->sig.dif.type) { - case IB_T10DIF_NONE: - /* No DIF */ - *selector = FORMAT_DIF_NONE; - break; - case IB_T10DIF_TYPE1: /* Fall through */ - case IB_T10DIF_TYPE2: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = FORMAT_DIF_CRC_INC; - break; - case IB_T10DIF_CSUM: - *selector = FORMAT_DIF_CSUM_INC; - break; - default: - return 1; - } - break; - case IB_T10DIF_TYPE3: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CRC_INC : - FORMAT_DIF_CRC_NO_INC; - break; - case IB_T10DIF_CSUM: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CSUM_INC : - FORMAT_DIF_CSUM_NO_INC; - break; - default: - return 1; - } - break; - default: - return 1; + if (domain->sig.dif.app_escape) { + if (domain->sig.dif.ref_escape) + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; + else + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; } - return 0; + inl->dif_app_bitmask_check = + cpu_to_be16(domain->sig.dif.apptag_check_mask); } static int mlx5_set_bsf(struct ib_mr *sig_mr, @@ -2080,45 +2063,49 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct mlx5_bsf_basic *basic = &bsf->basic; struct ib_sig_domain *mem = &sig_attrs->mem; struct ib_sig_domain *wire = &sig_attrs->wire; - int ret, selector; memset(bsf, 0, sizeof(*bsf)); + + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + basic->raw_data_size = cpu_to_be32(data_size); + + /* Memory domain */ switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) - return -EINVAL; + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + mlx5_fill_inl_bsf(mem, &bsf->m_inl); + break; + default: + return -EINVAL; + } - /* Input domain check byte mask */ - basic->check_byte_mask = sig_attrs->check_mask; + /* Wire domain */ + switch (sig_attrs->wire.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && - mem->sig.dif.type == wire->sig.dif.type) { + mem->sig_type == wire->sig_type) { /* Same block structure */ - basic->bsf_size_sbs = 1 << 4; + basic->bsf_size_sbs |= 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask |= 0xc0; + basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) - basic->wire.copy_byte_mask |= 0x30; + basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) - basic->wire.copy_byte_mask |= 0x0f; + basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); - basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); - basic->raw_data_size = cpu_to_be32(data_size); - - ret = format_selector(sig_attrs, mem, &selector); - if (ret) - return -EINVAL; - basic->m_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_memory.psv_idx); - - ret = format_selector(sig_attrs, wire, &selector); - if (ret) - return -EINVAL; - basic->w_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_wire.psv_idx); + basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); + mlx5_fill_inl_bsf(wire, &bsf->w_inl); break; - default: return -EINVAL; } @@ -2317,20 +2304,21 @@ static int set_psv_wr(struct ib_sig_domain *domain, memset(psv_seg, 0, sizeof(*psv_seg)); psv_seg->psv_num = cpu_to_be32(psv_idx); switch (domain->sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | domain->sig.dif.app_tag); psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); - - *seg += sizeof(*psv_seg); - *size += sizeof(*psv_seg) / 16; break; - default: pr_err("Bad signature type given.\n"); return 1; } + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + return 0; } @@ -2501,7 +2489,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { - if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); err = -EINVAL; *bad_wr = wr; @@ -2893,7 +2881,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); ib_ah_attr->port_num = path->port; - if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + if (ib_ah_attr->port_num == 0 || + ib_ah_attr->port_num > dev->caps.gen.num_ports) return; ib_ah_attr->sl = path->sl & 0xf; @@ -3011,10 +3000,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_general_caps *gen; struct mlx5_ib_xrcd *xrcd; int err; - if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) + gen = &dev->mdev->caps.gen; + if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 70bd131ba646..97cc1baaa8e3 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_general_caps *gen; struct mlx5_ib_srq *srq; int desc_size; int buf_size; @@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int is_xrc; u32 flgs, xrcdn; + gen = &dev->mdev->caps.gen; /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) { + if (init_attr->attr.max_wr >= gen->max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, - dev->mdev->caps.max_srq_wqes); + gen->max_srq_wqes); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index b6f7f457fc55..8881fa376e06 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev) agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 90200245c5eb..02120d340d50 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1003,13 +1003,13 @@ int nes_init_cqp(struct nes_device *nesdev) (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) + sizeof(struct nes_hw_cqp_qp_context); - nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size, - &nesdev->cqp_pbase); + nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev, + nesdev->cqp_mem_size, + &nesdev->cqp_pbase); if (!nesdev->cqp_vbase) { nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n"); return -ENOMEM; } - memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size); /* Allocate a twice the number of CQP requests as the SQ size */ nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * @@ -1691,13 +1691,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) + sizeof(struct nes_hw_nic_qp_context); - nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size, - &nesvnic->nic_pbase); + nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev, + nesvnic->nic_mem_size, + &nesvnic->nic_pbase); if (!nesvnic->nic_vbase) { nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n"); return -ENOMEM; } - memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size); nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n", nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 218dd3574285..fef067c959fc 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1616,8 +1616,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); /* allocate the physical buffer space */ - mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size, - &nescq->hw_cq.cq_pbase); + mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size, + &nescq->hw_cq.cq_pbase); if (!mem) { printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); @@ -1625,7 +1625,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(-ENOMEM); } - memset(mem, 0, nescq->cq_mem_size); nescq->hw_cq.cq_vbase = mem; nescq->hw_cq.cq_head = 0; nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 19011dbb930f..b43456ae124b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -40,7 +40,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" +#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -137,6 +137,7 @@ struct mqe_ctx { u16 cqe_status; u16 ext_status; bool cmd_done; + bool fw_error_state; }; struct ocrdma_hw_mr { @@ -235,7 +236,10 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; - u64 stag_arr[OCRDMA_MAX_STAG]; + u64 *stag_arr; + u8 sl; /* service level */ + bool pfc_state; + atomic_t update_sl; u16 pvid; u32 asic_id; @@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; } +static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio) +{ + return *(pfc + prio); +} + +static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio) +{ + return *(app_prio + prio); +} + +static inline u8 ocrdma_is_enabled_and_synced(u32 state) +{ /* May also be used to interpret TC-state, QCN-state + * Appl-state and Logical-link-state in future. + */ + return (state & OCRDMA_STATE_FLAG_ENABLED) && + (state & OCRDMA_STATE_FLAG_SYNC); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d4cc01f10c01..ac02ce4e8040 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -35,8 +35,10 @@ #include "ocrdma_ah.h" #include "ocrdma_hw.h" +#define OCRDMA_VID_PCP_SHIFT 0xD + static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, - struct ib_ah_attr *attr, int pdid) + struct ib_ah_attr *attr, union ib_gid *sgid, int pdid) { int status = 0; u16 vlan_tag; bool vlan_enabled = false; @@ -47,15 +49,14 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, memset(ð, 0, sizeof(eth)); memset(&grh, 0, sizeof(grh)); - ah->sgid_index = attr->grh.sgid_index; - + /* VLAN */ vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; if (vlan_tag && (vlan_tag < 0x1000)) { eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); - vlan_tag |= (attr->sl & 7) << 13; + vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; eth.vlan_tag = cpu_to_be16(vlan_tag); eth_sz = sizeof(struct ocrdma_eth_vlan); vlan_enabled = true; @@ -63,15 +64,14 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); eth_sz = sizeof(struct ocrdma_eth_basic); } + /* MAC */ memcpy(ð.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN); - memcpy(ð.dmac[0], attr->dmac, ETH_ALEN); status = ocrdma_resolve_dmac(dev, attr, ð.dmac[0]); if (status) return status; - status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, - (union ib_gid *)&grh.sgid[0]); - if (status) - return status; + ah->sgid_index = attr->grh.sgid_index; + memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); + memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); grh.tclass_flow = cpu_to_be32((6 << 28) | (attr->grh.traffic_class << 24) | @@ -79,8 +79,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, /* 0x1b is next header value in GRH */ grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | (0x1b << 8) | attr->grh.hop_limit); - - memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); + /* Eth HDR */ memcpy(&ah->av->eth_hdr, ð, eth_sz); memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); if (vlan_enabled) @@ -96,10 +95,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) struct ocrdma_ah *ah; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + union ib_gid sgid; + u8 zmac[ETH_ALEN]; if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (atomic_cmpxchg(&dev->update_sl, 1, 0)) + ocrdma_init_service_level(dev); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); @@ -107,7 +110,27 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) status = ocrdma_alloc_av(dev, ah); if (status) goto av_err; - status = set_av_attr(dev, ah, attr, pd->id); + + status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid); + if (status) { + pr_err("%s(): Failed to query sgid, status = %d\n", + __func__, status); + goto av_conf_err; + } + + memset(&zmac, 0, ETH_ALEN); + if (pd->uctx && + memcmp(attr->dmac, &zmac, ETH_ALEN)) { + status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, + attr->dmac, &attr->vlan_id); + if (status) { + pr_err("%s(): Failed to resolve dmac from gid." + "status = %d\n", __func__, status); + goto av_conf_err; + } + } + + status = set_av_attr(dev, ah, attr, &sgid, pd->id); if (status) goto av_conf_err; @@ -141,7 +164,7 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) struct ocrdma_av *av = ah->av; struct ocrdma_grh *grh; attr->ah_flags |= IB_AH_GRH; - if (ah->av->valid & Bit(1)) { + if (ah->av->valid & OCRDMA_AV_VALID) { grh = (struct ocrdma_grh *)((u8 *)ah->av + sizeof(struct ocrdma_eth_vlan)); attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 3bbf2010a821..638bff1ffc6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } -static void *ocrdma_alloc_mqe(void) -{ - return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); -} - static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -525,7 +520,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; cmd->eqn = eq->id; - cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); + cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe); ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, cq->dma, PAGE_SIZE_4K); @@ -566,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE); - cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE); + cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << @@ -661,7 +656,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt = { 0 }; + struct ib_event ib_evt; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -674,6 +669,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + memset(&ib_evt, 0, sizeof(ib_evt)); + ib_evt.device = &dev->ibdev; switch (type) { @@ -771,6 +768,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, OCRDMA_AE_PVID_MCQE_TAG_MASK) >> OCRDMA_AE_PVID_MCQE_TAG_SHIFT); break; + + case OCRDMA_ASYNC_EVENT_COS_VALUE: + atomic_set(&dev->update_sl, 1); + break; default: /* Not interested evts. */ break; @@ -962,8 +963,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev) msecs_to_jiffies(30000)); if (status) return 0; - else + else { + dev->mqe_ctx.fw_error_state = true; + pr_err("%s(%d) mailbox timeout: fw not responding\n", + __func__, dev->id); return -1; + } } /* issue a mailbox command on the MQ */ @@ -975,6 +980,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); + if (dev->mqe_ctx.fw_error_state) + goto mbx_err; ocrdma_post_mqe(dev, mqe); status = ocrdma_wait_mqe_cmpl(dev); if (status) @@ -1078,7 +1085,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; - attr->max_mr_size = ~0ull; + attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) | + rsp->max_mr_size_lo; attr->max_fmr = 0; attr->max_pages_per_frmr = rsp->max_pages_per_frmr; attr->max_num_mr_pbl = rsp->max_num_mr_pbl; @@ -1176,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) { struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; - struct ocrdma_rdma_stats_resp *old_stats = NULL; + struct ocrdma_rdma_stats_resp *old_stats; int status; - old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL); if (old_stats == NULL) return -ENOMEM; @@ -1222,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; struct mgmt_hba_attribs *hba_attribs; - mqe = ocrdma_alloc_mqe(); + mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); if (!mqe) return status; - memset(mqe, 0, sizeof(*mqe)); dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, @@ -1252,7 +1259,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; - dev->hba_port_num = hba_attribs->phy_port; + dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & + OCRDMA_HBA_ATTRB_PTNUM_MASK) + >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; strncpy(dev->model_number, hba_attribs->controller_model_number, 31); } @@ -1302,7 +1311,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = rsp->phys_port_speed; + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; mbx_err: kfree(cmd); @@ -1328,11 +1338,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) goto mbx_err; rsp = (struct ocrdma_get_phy_info_rsp *)cmd; - dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.phy_type = + (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK); + dev->phy.interface_type = + (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK) + >> OCRDMA_IF_TYPE_SHIFT; dev->phy.auto_speeds_supported = - le16_to_cpu(rsp->auto_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK); dev->phy.fixed_speeds_supported = - le16_to_cpu(rsp->fixed_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK) + >> OCRDMA_FSPEED_SUPP_SHIFT; mbx_err: kfree(cmd); return status; @@ -1457,8 +1472,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { - pbes[i].pa_lo = (u32) (pa & 0xffffffff); - pbes[i].pa_hi = (u32) upper_32_bits(pa); + pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff); + pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa)); pa += PAGE_SIZE; } cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF); @@ -1501,6 +1516,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev) ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, dev->av_tbl.pa); + dev->av_tbl.va = NULL; dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, dev->av_tbl.pbl.pa); kfree(cmd); @@ -1624,14 +1640,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; cq->phase_change = false; - cmd->cmd.cqe_count = (cq->len / cqe_size); + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size); } else { - cmd->cmd.cqe_count = (cq->len / cqe_size) - 1; + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1; cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID; cq->phase_change = true; } - cmd->cmd.pd_id = pd_id; /* valid only for v3 */ + /* pd_id valid only for v3 */ + cmd->cmd.pdid_cqecnt |= (pd_id << + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT); ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status) @@ -2206,7 +2224,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && pd->num_dpp_qp) { + if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && + (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } @@ -2254,7 +2273,8 @@ mbx_err: static int ocrdma_set_av_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs) + struct ib_qp_attr *attrs, + int attr_mask) { int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; @@ -2264,6 +2284,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, if ((ah_attr->ah_flags & IB_AH_GRH) == 0) return -EINVAL; + if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0)) + ocrdma_init_service_level(qp->dev); cmd->params.tclass_sq_psn |= (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); cmd->params.rnt_rc_sl_fl |= @@ -2292,11 +2314,13 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - vlan_id = ah_attr->vlan_id; - if (vlan_id && (vlan_id < 0x1000)) { + if (attr_mask & IB_QP_VID) { + vlan_id = attrs->vlan_id; cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; + cmd->params.rnt_rc_sl_fl |= + (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } return 0; } @@ -2318,7 +2342,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID; } if (attr_mask & IB_QP_AV) { - status = ocrdma_set_av_params(qp, cmd, attrs); + status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask); if (status) return status; } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { @@ -2604,6 +2628,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) return status; } +static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, + struct ocrdma_dcbx_cfg *dcbxcfg) +{ + int status = 0; + dma_addr_t pa; + struct ocrdma_mqe cmd; + + struct ocrdma_get_dcbx_cfg_req *req = NULL; + struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL; + struct pci_dev *pdev = dev->nic_info.pdev; + struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge; + + memset(&cmd, 0, sizeof(struct ocrdma_mqe)); + cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp), + sizeof(struct ocrdma_get_dcbx_cfg_req)); + req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL); + if (!req) { + status = -ENOMEM; + goto mem_err; + } + + cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL); + mqe_sge->pa_hi = (u32) upper_32_bits(pa); + mqe_sge->len = cmd.hdr.pyld_len; + + memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req)); + ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG, + OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len); + req->param_type = ptype; + + status = ocrdma_mbx_cmd(dev, &cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req; + ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp)); + memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg)); + +mbx_err: + dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa); +mem_err: + return status; +} + +#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08 +#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05 + +static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype, + struct ocrdma_dcbx_cfg *dcbxcfg, + u8 *srvc_lvl) +{ + int status = -EINVAL, indx, slindx; + int ventry_cnt; + struct ocrdma_app_parameter *app_param; + u8 valid, proto_sel; + u8 app_prio, pfc_prio; + u16 proto; + + if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) { + pr_info("%s ocrdma%d DCBX is disabled\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + goto out; + } + + if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) { + pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + (ptype > 0 ? "operational" : "admin"), + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ? + "enabled" : "disabled", + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ? + "" : ", not sync'ed"); + goto out; + } else { + pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + } + + ventry_cnt = (dcbxcfg->tcv_aev_opv_st >> + OCRDMA_DCBX_APP_ENTRY_SHIFT) + & OCRDMA_DCBX_STATE_MASK; + + for (indx = 0; indx < ventry_cnt; indx++) { + app_param = &dcbxcfg->app_param[indx]; + valid = (app_param->valid_proto_app >> + OCRDMA_APP_PARAM_VALID_SHIFT) + & OCRDMA_APP_PARAM_VALID_MASK; + proto_sel = (app_param->valid_proto_app + >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT) + & OCRDMA_APP_PARAM_PROTO_SEL_MASK; + proto = app_param->valid_proto_app & + OCRDMA_APP_PARAM_APP_PROTO_MASK; + + if ( + valid && proto == OCRDMA_APP_PROTO_ROCE && + proto_sel == OCRDMA_PROTO_SELECT_L2) { + for (slindx = 0; slindx < + OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) { + app_prio = ocrdma_get_app_prio( + (u8 *)app_param->app_prio, + slindx); + pfc_prio = ocrdma_get_pfc_prio( + (u8 *)dcbxcfg->pfc_prio, + slindx); + + if (app_prio && pfc_prio) { + *srvc_lvl = slindx; + status = 0; + goto out; + } + } + if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) { + pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n", + dev_name(&dev->nic_info.pdev->dev), + dev->id, proto); + } + } + } + +out: + return status; +} + +void ocrdma_init_service_level(struct ocrdma_dev *dev) +{ + int status = 0, indx; + struct ocrdma_dcbx_cfg dcbxcfg; + u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL; + int ptype = OCRDMA_PARAMETER_TYPE_OPER; + + for (indx = 0; indx < 2; indx++) { + status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg); + if (status) { + pr_err("%s(): status=%d\n", __func__, status); + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + status = ocrdma_parse_dcbxcfg_rsp(dev, ptype, + &dcbxcfg, &srvc_lvl); + if (status) { + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + break; + } + + if (status) + pr_info("%s ocrdma%d service level default\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + else + pr_info("%s ocrdma%d service level %d\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + srvc_lvl); + + dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state); + dev->sl = srvc_lvl; +} + int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) { int i; @@ -2709,13 +2895,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) goto conf_err; status = ocrdma_mbx_get_phy_info(dev); if (status) - goto conf_err; + goto info_attrb_err; status = ocrdma_mbx_get_ctrl_attribs(dev); if (status) - goto conf_err; + goto info_attrb_err; return 0; +info_attrb_err: + ocrdma_mbx_delete_ah_tbl(dev); conf_err: ocrdma_destroy_mq(dev); mq_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index e513f7293142..6eed8f191322 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); char *port_speed_string(struct ocrdma_dev *dev); +void ocrdma_init_service_level(struct ocrdma_dev *); + #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7c504e079744..b0b2257b8e04 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) if (!dev->qp_tbl) goto alloc_err; } + + dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL); + if (dev->stag_arr == NULL) + goto alloc_err; + spin_lock_init(&dev->av_tbl.lock); spin_lock_init(&dev->flush_q_lock); return 0; @@ -334,6 +339,7 @@ alloc_err: static void ocrdma_free_resources(struct ocrdma_dev *dev) { + kfree(dev->stag_arr); kfree(dev->qp_tbl); kfree(dev->cq_tbl); kfree(dev->sgid_tbl); @@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct ocrdma_dev *dev = dev_get_drvdata(device); - return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); +} + +static ssize_t show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver + &dev_attr_fw_ver, + &dev_attr_hca_type }; static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) @@ -372,6 +388,68 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } +static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) +{ + /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ + union ib_gid *sgid = &dev->sgid_tbl[0]; + + sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + ocrdma_get_guid(dev, &sgid->raw[8]); +} + +static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ + struct in_device *in_dev; + union ib_gid gid; + in_dev = in_dev_get(net); + if (in_dev) { + for_ifa(in_dev) { + ipv6_addr_set_v4mapped(ifa->ifa_address, + (struct in6_addr *)&gid); + ocrdma_add_sgid(dev, &gid); + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + } +} + +static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; + in6_dev = in6_dev_get(net); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + ocrdma_add_sgid(dev, pgid); + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif +} + +static void ocrdma_init_gid_table(struct ocrdma_dev *dev) +{ + struct net_device *net_dev; + + for_each_netdev(&init_net, net_dev) { + struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ? + rdma_vlan_dev_real_dev(net_dev) : net_dev; + + if (real_dev == dev->nic_info.netdev) { + ocrdma_add_default_sgid(dev); + ocrdma_init_ipv4_gids(dev, net_dev); + ocrdma_init_ipv6_gids(dev, net_dev); + } + } +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; @@ -399,6 +477,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + ocrdma_init_service_level(dev); + ocrdma_init_gid_table(dev); status = ocrdma_register_device(dev); if (status) goto alloc_err; @@ -508,6 +588,12 @@ static int ocrdma_close(struct ocrdma_dev *dev) return 0; } +static void ocrdma_shutdown(struct ocrdma_dev *dev) +{ + ocrdma_close(dev); + ocrdma_remove(dev); +} + /* event handling via NIC driver ensures that all the NIC specific * initialization done before RoCE driver notifies * event to stack. @@ -521,6 +607,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) case BE_DEV_DOWN: ocrdma_close(dev); break; + case BE_DEV_SHUTDOWN: + ocrdma_shutdown(dev); + break; } } @@ -567,8 +656,10 @@ static int __init ocrdma_init_module(void) return 0; err_be_reg: +#if IS_ENABLED(CONFIG_IPV6) ocrdma_unregister_inet6addr_notifier(); err_notifier6: +#endif ocrdma_unregister_inetaddr_notifier(); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 96c9ee602ba4..4e036480c1a8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -28,8 +28,6 @@ #ifndef __OCRDMA_SLI_H__ #define __OCRDMA_SLI_H__ -#define Bit(_b) (1 << (_b)) - enum { OCRDMA_ASIC_GEN_SKH_R = 0x04, OCRDMA_ASIC_GEN_LANCER = 0x0B @@ -44,35 +42,39 @@ enum { #define OCRDMA_SUBSYS_ROCE 10 enum { OCRDMA_CMD_QUERY_CONFIG = 1, - OCRDMA_CMD_ALLOC_PD, - OCRDMA_CMD_DEALLOC_PD, - - OCRDMA_CMD_CREATE_AH_TBL, - OCRDMA_CMD_DELETE_AH_TBL, - - OCRDMA_CMD_CREATE_QP, - OCRDMA_CMD_QUERY_QP, - OCRDMA_CMD_MODIFY_QP, - OCRDMA_CMD_DELETE_QP, - - OCRDMA_CMD_RSVD1, - OCRDMA_CMD_ALLOC_LKEY, - OCRDMA_CMD_DEALLOC_LKEY, - OCRDMA_CMD_REGISTER_NSMR, - OCRDMA_CMD_REREGISTER_NSMR, - OCRDMA_CMD_REGISTER_NSMR_CONT, - OCRDMA_CMD_QUERY_NSMR, - OCRDMA_CMD_ALLOC_MW, - OCRDMA_CMD_QUERY_MW, - - OCRDMA_CMD_CREATE_SRQ, - OCRDMA_CMD_QUERY_SRQ, - OCRDMA_CMD_MODIFY_SRQ, - OCRDMA_CMD_DELETE_SRQ, - - OCRDMA_CMD_ATTACH_MCAST, - OCRDMA_CMD_DETACH_MCAST, - OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_CMD_ALLOC_PD = 2, + OCRDMA_CMD_DEALLOC_PD = 3, + + OCRDMA_CMD_CREATE_AH_TBL = 4, + OCRDMA_CMD_DELETE_AH_TBL = 5, + + OCRDMA_CMD_CREATE_QP = 6, + OCRDMA_CMD_QUERY_QP = 7, + OCRDMA_CMD_MODIFY_QP = 8 , + OCRDMA_CMD_DELETE_QP = 9, + + OCRDMA_CMD_RSVD1 = 10, + OCRDMA_CMD_ALLOC_LKEY = 11, + OCRDMA_CMD_DEALLOC_LKEY = 12, + OCRDMA_CMD_REGISTER_NSMR = 13, + OCRDMA_CMD_REREGISTER_NSMR = 14, + OCRDMA_CMD_REGISTER_NSMR_CONT = 15, + OCRDMA_CMD_QUERY_NSMR = 16, + OCRDMA_CMD_ALLOC_MW = 17, + OCRDMA_CMD_QUERY_MW = 18, + + OCRDMA_CMD_CREATE_SRQ = 19, + OCRDMA_CMD_QUERY_SRQ = 20, + OCRDMA_CMD_MODIFY_SRQ = 21, + OCRDMA_CMD_DELETE_SRQ = 22, + + OCRDMA_CMD_ATTACH_MCAST = 23, + OCRDMA_CMD_DETACH_MCAST = 24, + + OCRDMA_CMD_CREATE_RBQ = 25, + OCRDMA_CMD_DESTROY_RBQ = 26, + + OCRDMA_CMD_GET_RDMA_STATS = 27, OCRDMA_CMD_MAX }; @@ -99,11 +101,11 @@ enum { QTYPE_MCCQ = 3 }; -#define OCRDMA_MAX_SGID (8) +#define OCRDMA_MAX_SGID 8 #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 -#define OCRDMA_MAX_STAG 8192 +#define OCRDMA_MAX_STAG 16384 enum { OCRDMA_DB_RQ_OFFSET = 0xE0, @@ -124,33 +126,33 @@ enum { #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ /* qid #2 msbits at 12-11 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1 -#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_DB_CQ_REARM_SHIFT 29 /* bit 29 */ /* solicited bit */ -#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */ +#define OCRDMA_DB_CQ_SOLICIT_SHIFT 31 /* bit 31 */ #define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */ #define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */ -#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */ +#define OCRDMA_EQ_ID_EXT_MASK_SHIFT 2 /* qid bits 9-13 at 11-15 */ /* Clear the interrupt for this eq */ -#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */ +#define OCRDMA_EQ_CLR_SHIFT 9 /* bit 9 */ /* Must be 1 */ -#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */ +#define OCRDMA_EQ_TYPE_SHIFT 10 /* bit 10 */ /* Number of event entries processed */ -#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_NUM_EQE_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_REARM_SHIFT 29 /* bit 29 */ #define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */ /* Number of entries posted */ -#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */ +#define OCRDMA_MQ_NUM_MQE_SHIFT 16 /* bits 16 - 29 */ -#define OCRDMA_MIN_HPAGE_SIZE (4096) +#define OCRDMA_MIN_HPAGE_SIZE 4096 -#define OCRDMA_MIN_Q_PAGE_SIZE (4096) -#define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_MIN_Q_PAGE_SIZE 4096 +#define OCRDMA_MAX_Q_PAGES 8 #define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C #define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF @@ -166,14 +168,14 @@ enum { # 6: 256K Bytes # 7: 512K Bytes */ -#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8) +#define OCRDMA_MAX_Q_PAGE_SIZE_CNT 8 #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES) -#define MAX_OCRDMA_QP_PAGES (8) +#define MAX_OCRDMA_QP_PAGES 8 #define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE) -#define OCRDMA_CREATE_CQ_MAX_PAGES (4) -#define OCRDMA_DPP_CQE_SIZE (4) +#define OCRDMA_CREATE_CQ_MAX_PAGES 4 +#define OCRDMA_DPP_CQE_SIZE 4 #define OCRDMA_GEN2_MAX_CQE 1024 #define OCRDMA_GEN2_CQ_PAGE_SIZE 4096 @@ -234,7 +236,7 @@ struct ocrdma_mqe_sge { enum { OCRDMA_MQE_HDR_EMB_SHIFT = 0, - OCRDMA_MQE_HDR_EMB_MASK = Bit(0), + OCRDMA_MQE_HDR_EMB_MASK = BIT(0), OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3, OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT, OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24, @@ -288,7 +290,7 @@ struct ocrdma_pa { u32 hi; }; -#define MAX_OCRDMA_EQ_PAGES (8) +#define MAX_OCRDMA_EQ_PAGES 8 struct ocrdma_create_eq_req { struct ocrdma_mbx_hdr req; u32 num_pages; @@ -300,7 +302,7 @@ struct ocrdma_create_eq_req { }; enum { - OCRDMA_CREATE_EQ_VALID = Bit(29), + OCRDMA_CREATE_EQ_VALID = BIT(29), OCRDMA_CREATE_EQ_CNT_SHIFT = 26, OCRDMA_CREATE_CQ_DELAY_SHIFT = 13, }; @@ -310,7 +312,7 @@ struct ocrdma_create_eq_rsp { u32 vector_eqid; }; -#define OCRDMA_EQ_MINOR_OTHER (0x1) +#define OCRDMA_EQ_MINOR_OTHER 0x1 enum { OCRDMA_MCQE_STATUS_SHIFT = 0, @@ -318,13 +320,13 @@ enum { OCRDMA_MCQE_ESTATUS_SHIFT = 16, OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT, OCRDMA_MCQE_CONS_SHIFT = 27, - OCRDMA_MCQE_CONS_MASK = Bit(27), + OCRDMA_MCQE_CONS_MASK = BIT(27), OCRDMA_MCQE_CMPL_SHIFT = 28, - OCRDMA_MCQE_CMPL_MASK = Bit(28), + OCRDMA_MCQE_CMPL_MASK = BIT(28), OCRDMA_MCQE_AE_SHIFT = 30, - OCRDMA_MCQE_AE_MASK = Bit(30), + OCRDMA_MCQE_AE_MASK = BIT(30), OCRDMA_MCQE_VALID_SHIFT = 31, - OCRDMA_MCQE_VALID_MASK = Bit(31) + OCRDMA_MCQE_VALID_MASK = BIT(31) }; struct ocrdma_mcqe { @@ -335,13 +337,13 @@ struct ocrdma_mcqe { }; enum { - OCRDMA_AE_MCQE_QPVALID = Bit(31), + OCRDMA_AE_MCQE_QPVALID = BIT(31), OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_CQVALID = Bit(31), + OCRDMA_AE_MCQE_CQVALID = BIT(31), OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_VALID = Bit(31), - OCRDMA_AE_MCQE_AE = Bit(30), + OCRDMA_AE_MCQE_VALID = BIT(31), + OCRDMA_AE_MCQE_AE = BIT(30), OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16, OCRDMA_AE_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT, @@ -382,9 +384,9 @@ enum { OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_mpa_mcqe { @@ -408,9 +410,9 @@ enum { OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_qp_mcqe { @@ -422,7 +424,12 @@ struct ocrdma_ae_qp_mcqe { #define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 #define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 -#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 + +enum ocrdma_async_grp5_events { + OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, + OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02, + OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03 +}; enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_CQ_ERROR = 0x00, @@ -440,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE { /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, - OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2), + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = BIT(2), OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3, - OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3), + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = BIT(3), OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8, OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF << OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT, @@ -525,8 +532,8 @@ struct ocrdma_mbx_query_config { u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; - u32 max_mr_size_lo; u32 max_mr_size_hi; + u32 max_mr_size_lo; u32 max_num_mr_pbl; u32 max_mw; u32 max_fmr; @@ -580,17 +587,26 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +enum { + OCRDMA_IF_TYPE_MASK = 0xFFFF0000, + OCRDMA_IF_TYPE_SHIFT = 0x10, + OCRDMA_PHY_TYPE_MASK = 0x0000FFFF, + OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000, + OCRDMA_FUTURE_DETAILS_SHIFT = 0x10, + OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF, + OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000, + OCRDMA_FSPEED_SUPP_SHIFT = 0x10, + OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF +}; + struct ocrdma_get_phy_info_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u16 phy_type; - u16 interface_type; + u32 ityp_ptyp; u32 misc_params; - u16 ext_phy_details; - u16 rsvd; - u16 auto_speeds_supported; - u16 fixed_speeds_supported; + u32 ftrdtl_exphydtl; + u32 fspeed_aspeed; u32 future_use[2]; }; @@ -603,19 +619,34 @@ enum { OCRDMA_PHY_SPEED_40GBPS = 0x20 }; +enum { + OCRDMA_PORT_NUM_MASK = 0x3F, + OCRDMA_PT_MASK = 0xC0, + OCRDMA_PT_SHIFT = 0x6, + OCRDMA_LINK_DUP_MASK = 0x0000FF00, + OCRDMA_LINK_DUP_SHIFT = 0x8, + OCRDMA_PHY_PS_MASK = 0x00FF0000, + OCRDMA_PHY_PS_SHIFT = 0x10, + OCRDMA_PHY_PFLT_MASK = 0xFF000000, + OCRDMA_PHY_PFLT_SHIFT = 0x18, + OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, + OCRDMA_QOS_LNKSP_SHIFT = 0x10, + OCRDMA_LLST_MASK = 0xFF, + OCRDMA_PLFC_MASK = 0x00000400, + OCRDMA_PLFC_SHIFT = 0x8, + OCRDMA_PLRFC_MASK = 0x00000200, + OCRDMA_PLRFC_SHIFT = 0x8, + OCRDMA_PLTFC_MASK = 0x00000100, + OCRDMA_PLTFC_SHIFT = 0x8 +}; struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u8 pt_port_num; - u8 link_duplex; - u8 phys_port_speed; - u8 phys_port_fault; - u16 rsvd1; - u16 qos_lnk_speed; - u8 logical_lnk_status; - u8 rsvd2[3]; + u32 pflt_pps_ld_pnum; + u32 qos_lsp; + u32 res_lls; }; enum { @@ -639,9 +670,9 @@ enum { OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF, OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12, - OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12), - OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14), - OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15), + OCRDMA_CREATE_CQ_COALESCWM_MASK = BIT(13) | BIT(12), + OCRDMA_CREATE_CQ_FLAGS_NODELAY = BIT(14), + OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = BIT(15), OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF, OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF @@ -654,8 +685,8 @@ enum { OCRDMA_CREATE_CQ_EQID_SHIFT = 22, OCRDMA_CREATE_CQ_CNT_SHIFT = 27, - OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29), - OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31), + OCRDMA_CREATE_CQ_FLAGS_VALID = BIT(29), + OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = BIT(31), OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID | OCRDMA_CREATE_CQ_FLAGS_EVENTABLE | OCRDMA_CREATE_CQ_FLAGS_NODELAY @@ -666,8 +697,7 @@ struct ocrdma_create_cq_cmd { u32 pgsz_pgcnt; u32 ev_cnt_flags; u32 eqn; - u16 cqe_count; - u16 pd_id; + u32 pdid_cqecnt; u32 rsvd6; struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; }; @@ -678,6 +708,10 @@ struct ocrdma_create_cq { }; enum { + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10 +}; + +enum { OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF }; @@ -695,8 +729,8 @@ enum { OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22, OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16, OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16, - OCRDMA_CREATE_MQ_VALID = Bit(31), - OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) + OCRDMA_CREATE_MQ_VALID = BIT(31), + OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = BIT(0) }; struct ocrdma_create_mq_req { @@ -747,7 +781,7 @@ enum { OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16, OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19, OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29, - OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29), + OCRDMA_CREATE_QP_REQ_QPT_MASK = BIT(31) | BIT(30) | BIT(29), OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0, OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF, @@ -762,23 +796,23 @@ enum { OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT, OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0, - OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0), + OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = BIT(0), OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1, - OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1), + OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = BIT(1), OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2, - OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2), + OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = BIT(2), OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3, - OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3), + OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = BIT(3), OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4, - OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4), + OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = BIT(4), OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5, - OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5), + OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = BIT(5), OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6, - OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6), + OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = BIT(6), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = BIT(7), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = BIT(8), OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16, OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT, @@ -891,7 +925,7 @@ enum { OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF << OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT, - OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0), + OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = BIT(0), OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1, OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF << OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT, @@ -928,38 +962,38 @@ enum { OCRDMA_MODIFY_QP_ID_SHIFT = 0, OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF, - OCRDMA_QP_PARA_QPS_VALID = Bit(0), - OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1), - OCRDMA_QP_PARA_PKEY_VALID = Bit(2), - OCRDMA_QP_PARA_QKEY_VALID = Bit(3), - OCRDMA_QP_PARA_PMTU_VALID = Bit(4), - OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5), - OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6), - OCRDMA_QP_PARA_RRC_VALID = Bit(7), - OCRDMA_QP_PARA_RQPSN_VALID = Bit(8), - OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9), - OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10), - OCRDMA_QP_PARA_RNT_VALID = Bit(11), - OCRDMA_QP_PARA_SQPSN_VALID = Bit(12), - OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13), - OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14), - OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15), - OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16), - OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17), - OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18), - OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19), - OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20), - OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21), - OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22), - OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23), - OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24), - OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25), - OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26), - - OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0), - OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1), - OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2), - OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3) + OCRDMA_QP_PARA_QPS_VALID = BIT(0), + OCRDMA_QP_PARA_SQD_ASYNC_VALID = BIT(1), + OCRDMA_QP_PARA_PKEY_VALID = BIT(2), + OCRDMA_QP_PARA_QKEY_VALID = BIT(3), + OCRDMA_QP_PARA_PMTU_VALID = BIT(4), + OCRDMA_QP_PARA_ACK_TO_VALID = BIT(5), + OCRDMA_QP_PARA_RETRY_CNT_VALID = BIT(6), + OCRDMA_QP_PARA_RRC_VALID = BIT(7), + OCRDMA_QP_PARA_RQPSN_VALID = BIT(8), + OCRDMA_QP_PARA_MAX_IRD_VALID = BIT(9), + OCRDMA_QP_PARA_MAX_ORD_VALID = BIT(10), + OCRDMA_QP_PARA_RNT_VALID = BIT(11), + OCRDMA_QP_PARA_SQPSN_VALID = BIT(12), + OCRDMA_QP_PARA_DST_QPN_VALID = BIT(13), + OCRDMA_QP_PARA_MAX_WQE_VALID = BIT(14), + OCRDMA_QP_PARA_MAX_RQE_VALID = BIT(15), + OCRDMA_QP_PARA_SGE_SEND_VALID = BIT(16), + OCRDMA_QP_PARA_SGE_RECV_VALID = BIT(17), + OCRDMA_QP_PARA_SGE_WR_VALID = BIT(18), + OCRDMA_QP_PARA_INB_RDEN_VALID = BIT(19), + OCRDMA_QP_PARA_INB_WREN_VALID = BIT(20), + OCRDMA_QP_PARA_FLOW_LBL_VALID = BIT(21), + OCRDMA_QP_PARA_BIND_EN_VALID = BIT(22), + OCRDMA_QP_PARA_ZLKEY_EN_VALID = BIT(23), + OCRDMA_QP_PARA_FMR_EN_VALID = BIT(24), + OCRDMA_QP_PARA_INBAT_EN_VALID = BIT(25), + OCRDMA_QP_PARA_VLAN_EN_VALID = BIT(26), + + OCRDMA_MODIFY_QP_FLAGS_RD = BIT(0), + OCRDMA_MODIFY_QP_FLAGS_WR = BIT(1), + OCRDMA_MODIFY_QP_FLAGS_SEND = BIT(2), + OCRDMA_MODIFY_QP_FLAGS_ATOMIC = BIT(3) }; enum { @@ -978,15 +1012,15 @@ enum { OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT, - OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0), - OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1), - OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2), - OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3), - OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4), + OCRDMA_QP_PARAMS_FLAGS_FMR_EN = BIT(0), + OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = BIT(1), + OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = BIT(2), + OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = BIT(3), + OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = BIT(4), OCRDMA_QP_PARAMS_STATE_SHIFT = 5, - OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7), - OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8), - OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9), + OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), + OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, @@ -1231,7 +1265,6 @@ struct ocrdma_destroy_srq { enum { OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), - OCRDMA_PD_MAX_DPP_ENABLED_QP = 8, OCRDMA_DPP_PAGE_SIZE = 4096 }; @@ -1242,7 +1275,7 @@ struct ocrdma_alloc_pd { }; enum { - OCRDMA_ALLOC_PD_RSP_DPP = Bit(16), + OCRDMA_ALLOC_PD_RSP_DPP = BIT(16), OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20, OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF, }; @@ -1274,18 +1307,18 @@ enum { OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF, OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0, - OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0), + OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = BIT(0), OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1, - OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1), + OCRDMA_ALLOC_LKEY_FMR_MASK = BIT(1), OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2, - OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2), + OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = BIT(2), OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3, - OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3), + OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = BIT(3), OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4, - OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4), + OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = BIT(4), OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5, - OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5), - OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6), + OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = BIT(5), + OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = BIT(6), OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6, OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16, OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF << @@ -1344,21 +1377,21 @@ enum { OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF << OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT, OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24, - OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24), + OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = BIT(24), OCRDMA_REG_NSMR_ZB_SHIFT = 25, - OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25), + OCRDMA_REG_NSMR_ZB_SHIFT_MASK = BIT(25), OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26, - OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26), + OCRDMA_REG_NSMR_REMOTE_INV_MASK = BIT(26), OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27, - OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27), + OCRDMA_REG_NSMR_REMOTE_WR_MASK = BIT(27), OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28, - OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28), + OCRDMA_REG_NSMR_REMOTE_RD_MASK = BIT(28), OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29, - OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29), + OCRDMA_REG_NSMR_LOCAL_WR_MASK = BIT(29), OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30, - OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30), + OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = BIT(30), OCRDMA_REG_NSMR_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr { @@ -1385,7 +1418,7 @@ enum { OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT, OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_CONT_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr_cont { @@ -1531,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp { enum { OCRDMA_EQE_VALID_SHIFT = 0, - OCRDMA_EQE_VALID_MASK = Bit(0), + OCRDMA_EQE_VALID_MASK = BIT(0), OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << @@ -1589,11 +1622,11 @@ enum { OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT, OCRDMA_CQE_STATUS_SHIFT = 16, OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT, - OCRDMA_CQE_VALID = Bit(31), - OCRDMA_CQE_INVALIDATE = Bit(30), - OCRDMA_CQE_QTYPE = Bit(29), - OCRDMA_CQE_IMM = Bit(28), - OCRDMA_CQE_WRITE_IMM = Bit(27), + OCRDMA_CQE_VALID = BIT(31), + OCRDMA_CQE_INVALIDATE = BIT(30), + OCRDMA_CQE_QTYPE = BIT(29), + OCRDMA_CQE_IMM = BIT(28), + OCRDMA_CQE_WRITE_IMM = BIT(27), OCRDMA_CQE_QTYPE_SQ = 0, OCRDMA_CQE_QTYPE_RQ = 1, OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF @@ -1737,8 +1770,8 @@ struct ocrdma_grh { u16 rsvd; } __packed; -#define OCRDMA_AV_VALID Bit(7) -#define OCRDMA_AV_VLAN_VALID Bit(1) +#define OCRDMA_AV_VALID BIT(7) +#define OCRDMA_AV_VLAN_VALID BIT(1) struct ocrdma_av { struct ocrdma_eth_vlan eth_hdr; @@ -1896,12 +1929,62 @@ struct ocrdma_rdma_stats_resp { struct ocrdma_rx_dbg_stats rx_dbg_stats; } __packed; +enum { + OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF, + OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_CV_MASK = 0xFF, + OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000, + OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000, + OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E, + OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF, + OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF +}; struct mgmt_hba_attribs { u8 flashrom_version_string[32]; u8 manufacturer_name[32]; u32 supported_modes; - u32 rsvd0[3]; + u32 rsvd_eprom_verhi_verlo; + u32 mbx_ds_ver; + u32 epfw_ds_ver; u8 ncsi_ver_string[12]; u32 default_extended_timeout; u8 controller_model_number[32]; @@ -1914,34 +1997,26 @@ struct mgmt_hba_attribs { u8 driver_version_string[32]; u8 fw_on_flash_version_string[32]; u32 functionalities_supported; - u16 max_cdblength; - u8 asic_revision; - u8 generational_guid[16]; - u8 hba_port_count; - u16 default_link_down_timeout; - u8 iscsi_ver_min_max; - u8 multifunction_device; - u8 cache_valid; - u8 hba_status; - u8 max_domains_supported; - u8 phy_port; + u32 guid0_asicrev_cdblen; + u8 generational_guid[12]; + u32 portcnt_guid15; + u32 mfuncdev_iscsi_ldtout; + u32 ptpnum_maxdoms_hbast_cv; u32 firmware_post_status; u32 hba_mtu[8]; - u32 rsvd1[4]; + u32 res_asicgen_iscsi_feaures; + u32 rsvd1[3]; }; struct mgmt_controller_attrib { struct mgmt_hba_attribs hba_attribs; - u16 pci_vendor_id; - u16 pci_device_id; - u16 pci_sub_vendor_id; - u16 pci_sub_system_id; - u8 pci_bus_number; - u8 pci_device_number; - u8 pci_function_number; - u8 interface_type; - u64 unique_identifier; - u32 rsvd0[5]; + u32 pci_did_vid; + u32 pci_ssid_svid; + u32 ityp_fnum_devnum_bnum; + u32 uid_hi; + u32 uid_lo; + u32 res_nnetfil; + u32 rsvd0[4]; }; struct ocrdma_get_ctrl_attribs_rsp { @@ -1949,5 +2024,79 @@ struct ocrdma_get_ctrl_attribs_rsp { struct mgmt_controller_attrib ctrl_attribs; }; +#define OCRDMA_SUBSYS_DCBX 0x10 + +enum OCRDMA_DCBX_OPCODE { + OCRDMA_CMD_GET_DCBX_CONFIG = 0x01 +}; + +enum OCRDMA_DCBX_PARAM_TYPE { + OCRDMA_PARAMETER_TYPE_ADMIN = 0x00, + OCRDMA_PARAMETER_TYPE_OPER = 0x01, + OCRDMA_PARAMETER_TYPE_PEER = 0x02 +}; + +enum OCRDMA_DCBX_APP_PROTO { + OCRDMA_APP_PROTO_ROCE = 0x8915 +}; + +enum OCRDMA_DCBX_PROTO { + OCRDMA_PROTO_SELECT_L2 = 0x00, + OCRDMA_PROTO_SELECT_L4 = 0x01 +}; + +enum OCRDMA_DCBX_APP_PARAM { + OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF, + OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF, + OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10, + OCRDMA_APP_PARAM_VALID_MASK = 0xFF, + OCRDMA_APP_PARAM_VALID_SHIFT = 0x18 +}; + +enum OCRDMA_DCBX_STATE_FLAGS { + OCRDMA_STATE_FLAG_ENABLED = 0x01, + OCRDMA_STATE_FLAG_ADDVERTISED = 0x02, + OCRDMA_STATE_FLAG_WILLING = 0x04, + OCRDMA_STATE_FLAG_SYNC = 0x08, + OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000, + OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000 +}; + +enum OCRDMA_TCV_AEV_OPV_ST { + OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF, + OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18, + OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10, + OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08, + OCRDMA_DCBX_STATE_MASK = 0xFF +}; + +struct ocrdma_app_parameter { + u32 valid_proto_app; + u32 oui; + u32 app_prio[2]; +}; + +struct ocrdma_dcbx_cfg { + u32 tcv_aev_opv_st; + u32 tc_state; + u32 pfc_state; + u32 qcn_state; + u32 appl_state; + u32 ll_state; + u32 tc_bw[2]; + u32 tc_prio[8]; + u32 pfc_prio[2]; + struct ocrdma_app_parameter app_param[15]; +}; + +struct ocrdma_get_dcbx_cfg_req { + struct ocrdma_mbx_hdr hdr; + u32 param_type; +} __packed; + +struct ocrdma_get_dcbx_cfg_rsp { + struct ocrdma_mbx_rsp hdr; + struct ocrdma_dcbx_cfg cfg; +} __packed; #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8bae8718fa53..4c68305ee781 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); - attr->max_mr_size = ~0ull; + attr->max_mr_size = dev->attr.max_mr_size; attr->page_size_cap = 0xffff000; attr->vendor_id = dev->nic_info.pdev->vendor; attr->vendor_part_id = dev->nic_info.pdev->device; - attr->hw_ver = 0; + attr->hw_ver = dev->asic_id; attr->max_qp = dev->attr.max_qp; attr->max_ah = OCRDMA_MAX_AH; attr->max_qp_wr = dev->attr.max_wqe; @@ -101,7 +101,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->max_srq_sge = dev->attr.max_srq_sge; attr->max_srq_wr = dev->attr.max_rqe; attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; - attr->max_fast_reg_page_list_len = 0; + attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr; attr->max_pkeys = 1; return 0; } @@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = - pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + pd->dpp_enabled ? (dev->nic_info.db_page_size / + dev->attr.wqe_size) : 0; } retry: @@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) struct ocrdma_pd *pd = uctx->cntxt_pd; struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); - BUG_ON(uctx->pd_in_use); + if (uctx->pd_in_use) { + pr_err("%s(%d) Freeing in use pdid=0x%x.\n", + __func__, dev->id, pd->id); + } uctx->cntxt_pd = NULL; status = _ocrdma_dealloc_pd(dev, pd); return status; @@ -384,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, memset(&resp, 0, sizeof(resp)); resp.ah_tbl_len = ctx->ah_tbl.len; - resp.ah_tbl_page = ctx->ah_tbl.pa; + resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va); status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); if (status) @@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) if (mr->umem) ib_umem_release(mr->umem); kfree(mr); + + /* Don't stop cleanup, in case FW is unresponsive */ + if (dev->mqe_ctx.fw_error_state) { + status = 0; + pr_err("%s(%d) fw not responding.\n", + __func__, dev->id); + } return status; } @@ -859,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, uresp.page_size = PAGE_ALIGN(cq->len); uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; - uresp.page_addr[0] = cq->pa; + uresp.page_addr[0] = virt_to_phys(cq->va); uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; @@ -1112,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.sq_dbid = qp->sq.dbid; uresp.num_sq_pages = 1; uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); - uresp.sq_page_addr[0] = qp->sq.pa; + uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va); uresp.num_wqe_allocated = qp->sq.max_cnt; if (!srq) { uresp.rq_dbid = qp->rq.dbid; uresp.num_rq_pages = 1; uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); - uresp.rq_page_addr[0] = qp->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va); uresp.num_rqe_allocated = qp->rq.max_cnt; } uresp.db_page_addr = usr_db; @@ -1669,7 +1680,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, memset(&uresp, 0, sizeof(uresp)); uresp.rq_dbid = srq->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_addr[0] = srq->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va); uresp.rq_page_size = srq->rq.len; uresp.db_page_addr = dev->nic_info.unmapped_db + (srq->pd->id * dev->nic_info.db_page_size); @@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } while (wr) { + if (qp->qp_type == IB_QPT_UD && + (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM)) { + *bad_wr = wr; + status = -EINVAL; + break; + } if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { *bad_wr = wr; @@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp, *stop = true; expand = false; } + } else if (is_hw_sq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); @@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, *stop = true; expand = false; } + } else if (is_hw_rq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); @@ -2818,11 +2846,9 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) if (cq->first_arm) { ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); cq->first_arm = false; - goto skip_defer; } - cq->deferred_arm = true; -skip_defer: + cq->deferred_arm = true; cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 799a0c3bffc4..6abd3ed3cd51 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -193,6 +193,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) struct qib_qp_iter *iter; loff_t n = *pos; + rcu_read_lock(); iter = qib_qp_iter_init(s->private); if (!iter) return NULL; @@ -224,7 +225,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) { - /* nothing for now */ + rcu_read_unlock(); } static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index cab610ccd50e..81854586c081 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 8d3c78ddc906..729da39c49ed 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1222,7 +1222,7 @@ static int qib_init_one(struct pci_dev *, const struct pci_device_id *); #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " -static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { +static const struct pci_device_id qib_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 22c720e5740d..636be117b578 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev) ibp = &dd->pport[p].ibport_data; agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 7fcc150d603c..6ddc0264aad2 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1325,7 +1325,6 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) struct qib_qp *pqp = iter->qp; struct qib_qp *qp; - rcu_read_lock(); for (; n < dev->qp_table_size; n++) { if (pqp) qp = rcu_dereference(pqp->next); @@ -1333,18 +1332,11 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) qp = rcu_dereference(dev->qp_table[n]); pqp = qp; if (qp) { - if (iter->qp) - atomic_dec(&iter->qp->refcount); - atomic_inc(&qp->refcount); - rcu_read_unlock(); iter->qp = qp; iter->n = n; return 0; } } - rcu_read_unlock(); - if (iter->qp) - atomic_dec(&iter->qp->refcount); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 2bc1d2b96298..74f90b2619f6 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -52,7 +52,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, * Call with current->mm->mmap_sem held. */ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p, struct vm_area_struct **vma) + struct page **p) { unsigned long lock_limit; size_t got; @@ -69,7 +69,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, num_pages - got, 1, 1, - p + got, vma); + p + got, NULL); if (ret < 0) goto bail_release; } @@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages, down_write(¤t->mm->mmap_sem); - ret = __qib_get_user_pages(start_page, num_pages, p, NULL); + ret = __qib_get_user_pages(start_page, num_pages, p); up_write(¤t->mm->mmap_sem); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index fb6d026f92cd..0d0f98695d53 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -490,7 +490,7 @@ out: /* Start of PCI section */ -static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = { +static const struct pci_device_id usnic_ib_pci_ids[] = { {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)}, {0,} }; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 801a1d6937e4..417de1f32960 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -507,7 +507,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) if (err) goto out_free_dev; - if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { + if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) { usnic_err("IOMMU of %s does not support cache coherency\n", dev_name(dev)); err = -EINVAL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index c639f90cfda4..d7562beb5423 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -86,7 +86,6 @@ enum { IPOIB_FLAG_INITIALIZED = 1, IPOIB_FLAG_ADMIN_UP = 2, IPOIB_PKEY_ASSIGNED = 3, - IPOIB_PKEY_STOP = 4, IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, @@ -132,6 +131,12 @@ struct ipoib_cb { u8 hwaddr[INFINIBAND_ALEN]; }; +static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb)); + return (struct ipoib_cb *)skb->cb; +} + /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ struct ipoib_mcast { struct ib_sa_mcmember_rec mcmember; @@ -312,7 +317,6 @@ struct ipoib_dev_priv { struct list_head multicast_list; struct rb_root multicast_tree; - struct delayed_work pkey_poll_task; struct delayed_work mcast_task; struct work_struct carrier_on_task; struct work_struct flush_light; @@ -473,10 +477,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); -int ipoib_ib_dev_open(struct net_device *dev); +int ipoib_ib_dev_open(struct net_device *dev, int flush); int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev, int flush); +void ipoib_pkey_dev_check_presence(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); @@ -532,8 +537,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf); void ipoib_setup(struct net_device *dev); -void ipoib_pkey_poll(struct work_struct *work); -int ipoib_pkey_dev_delay_open(struct net_device *dev); +void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 50061854616e..6bd5740e2691 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (priv->mcg_dentry) - debugfs_remove(priv->mcg_dentry); - if (priv->path_dentry) - debugfs_remove(priv->path_dentry); + debugfs_remove(priv->mcg_dentry); + debugfs_remove(priv->path_dentry); } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6a7003ddb0be..72626c348174 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx) drain_tx_cq((struct net_device *)ctx); } -int ipoib_ib_dev_open(struct net_device *dev) +int ipoib_ib_dev_open(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { - ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ipoib_pkey_dev_check_presence(dev); + + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey, + (!(priv->pkey & 0x7fff) ? "Invalid" : "not found")); return -1; } - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = ipoib_init_qp(dev); if (ret) { @@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev) dev_stop: if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) napi_enable(&priv->napi); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_stop(dev, flush); return -1; } -static void ipoib_pkey_dev_check_presence(struct net_device *dev) +void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - u16 pkey_index = 0; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) + if (!(priv->pkey & 0x7fff) || + ib_find_pkey(priv->ca, priv->port, priv->pkey, + &priv->pkey_index)) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); else set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); @@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); netif_carrier_off(dev); - /* Shutdown the P_Key thread if still active */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - set_bit(IPOIB_PKEY_STOP, &priv->flags); - cancel_delayed_work_sync(&priv->pkey_poll_task); - mutex_unlock(&pkey_mutex); - } - ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_dev_flush(dev); @@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) (unsigned long) dev); if (dev->flags & IFF_UP) { - if (ipoib_ib_dev_open(dev)) { + if (ipoib_ib_dev_open(dev, 1)) { ipoib_transport_dev_cleanup(dev); return -ENODEV; } @@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv) return 1; } +/* + * returns 0 if pkey value was found in a different slot. + */ +static inline int update_child_pkey(struct ipoib_dev_priv *priv) +{ + u16 old_index = priv->pkey_index; + + priv->pkey_index = 0; + ipoib_pkey_dev_check_presence(priv->dev); + + if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && + (old_index == priv->pkey_index)) + return 1; + return 0; +} static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; - u16 new_index; int result; down_read(&priv->vlan_rwsem); @@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, up_read(&priv->vlan_rwsem); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { - /* for non-child devices must check/update the pkey value here */ - if (level == IPOIB_FLUSH_HEAVY && - !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - update_parent_pkey(priv); + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && + level != IPOIB_FLUSH_HEAVY) { ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { + /* interface is down. update pkey and leave. */ + if (level == IPOIB_FLUSH_HEAVY) { + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + update_parent_pkey(priv); + else + update_child_pkey(priv); + } ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); return; } @@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, * (parent) devices should always takes what present in pkey index 0 */ if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - ipoib_ib_dev_down(dev, 0); - ipoib_ib_dev_stop(dev, 0); - if (ipoib_pkey_dev_delay_open(dev)) - return; - } - /* restart QP only if P_Key index is changed */ - if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && - new_index == priv->pkey_index) { + result = update_child_pkey(priv); + if (result) { + /* restart QP only if P_Key index is changed */ ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); return; } - priv->pkey_index = new_index; + } else { result = update_parent_pkey(priv); /* restart QP only if P_Key value changed */ @@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev, 0); if (level == IPOIB_FLUSH_HEAVY) { - ipoib_ib_dev_stop(dev, 0); - ipoib_ib_dev_open(dev); + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) + ipoib_ib_dev_stop(dev, 0); + if (ipoib_ib_dev_open(dev, 0) != 0) + return; + if (netif_queue_stopped(dev)) + netif_start_queue(dev); } /* @@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ipoib_transport_dev_cleanup(dev); } -/* - * Delayed P_Key Assigment Interim Support - * - * The following is initial implementation of delayed P_Key assigment - * mechanism. It is using the same approach implemented for the multicast - * group join. The single goal of this implementation is to quickly address - * Bug #2507. This implementation will probably be removed when the P_Key - * change async notification is available. - */ - -void ipoib_pkey_poll(struct work_struct *work) -{ - struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); - struct net_device *dev = priv->dev; - - ipoib_pkey_dev_check_presence(dev); - - if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) - ipoib_open(dev); - else { - mutex_lock(&pkey_mutex); - if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - } -} - -int ipoib_pkey_dev_delay_open(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - - /* Look for the interface pkey value in the IB Port P_Key table and */ - /* set the interface pkey assigment flag */ - ipoib_pkey_dev_check_presence(dev); - /* P_Key value not assigned yet - start polling */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - clear_bit(IPOIB_PKEY_STOP, &priv->flags); - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - return 1; - } - - return 0; -} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4e675f4fecc9..58b5aa3b6f2d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - if (ipoib_pkey_dev_delay_open(dev)) - return 0; - - if (ipoib_ib_dev_open(dev)) + if (ipoib_ib_dev_open(dev, 1)) { + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) + return 0; goto err_disable; + } if (ipoib_ib_dev_up(dev)) goto err_stop; @@ -716,7 +716,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + struct ipoib_cb *cb = ipoib_skb_cb(skb); struct ipoib_header *header; unsigned long flags; @@ -813,7 +813,7 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + struct ipoib_cb *cb = ipoib_skb_cb(skb); header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = (NETIF_F_VLAN_CHALLENGED | NETIF_F_HIGHDMA); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); @@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); - INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d4e005720d01..ffb83b5f7e80 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -529,21 +529,13 @@ void ipoib_mcast_join_task(struct work_struct *work) port_attr.state); return; } + priv->local_lid = port_attr.lid; if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) ipoib_warn(priv, "ib_query_gid() failed\n"); else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); - { - struct ib_port_attr attr; - - if (!ib_query_port(priv->ca, priv->port, &attr)) - priv->local_lid = attr.lid; - else - ipoib_warn(priv, "ib_query_port failed\n"); - } - if (!priv->broadcast) { struct ipoib_mcast *broadcast; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index eb7973957a6e..f42ab14105ac 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,7 +83,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; bool iser_pi_enable = false; -int iser_pi_guard = 0; +int iser_pi_guard = 1; MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); @@ -97,14 +97,24 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); module_param_named(pi_guard, iser_pi_guard, int, 0644); -MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)"); static struct workqueue_struct *release_wq; struct iser_global ig; +/* + * iscsi_iser_recv() - Process a successfull recv completion + * @conn: iscsi connection + * @hdr: iscsi header + * @rx_data: buffer containing receive data payload + * @rx_data_len: length of rx_data + * + * Notes: In case of data length errors or iscsi PDU completion failures + * this routine will signal iscsi layer of connection failure. + */ void -iscsi_iser_recv(struct iscsi_conn *conn, - struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) +iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + char *rx_data, int rx_data_len) { int rc = 0; int datalen; @@ -135,20 +145,30 @@ error: iscsi_conn_failure(conn, rc); } -static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) +/** + * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU + * @task: iscsi task + * @opcode: iscsi command opcode + * + * Netes: This routine can't fail, just assign iscsi task + * hdr and max hdr size. + */ +static int +iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) { struct iscsi_iser_task *iser_task = task->dd_data; task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header; task->hdr_max = sizeof(iser_task->desc.iscsi_header); + return 0; } int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { - struct iser_conn *ib_conn = task->conn->dd_data; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; @@ -162,14 +182,18 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_task->ib_conn = ib_conn; + iser_task->iser_conn = iser_conn; return 0; } + /** - * iscsi_iser_task_init - Initialize task + * iscsi_iser_task_init() - Initialize iscsi-iser task * @task: iscsi task * * Initialize the task for the scsi command or mgmt command. + * + * Return: Returns zero on success or -ENOMEM when failing + * to init task headers (dma mapping error). */ static int iscsi_iser_task_init(struct iscsi_task *task) @@ -191,7 +215,7 @@ iscsi_iser_task_init(struct iscsi_task *task) } /** - * iscsi_iser_mtask_xmit - xmit management(immediate) task + * iscsi_iser_mtask_xmit() - xmit management (immediate) task * @conn: iscsi connection * @task: task management task * @@ -249,6 +273,12 @@ iscsi_iser_task_xmit_unsol_data_exit: return error; } +/** + * iscsi_iser_task_xmit() - xmit iscsi-iser task + * @task: iscsi task + * + * Return: zero on success or escalates $error on failure. + */ static int iscsi_iser_task_xmit(struct iscsi_task *task) { @@ -286,12 +316,24 @@ iscsi_iser_task_xmit(struct iscsi_task *task) return error; } +/** + * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task + * @task: iscsi task + * + * Notes: In case the RDMA device is already NULL (might have + * been removed in DEVICE_REMOVAL CM event it will bail-out + * without doing dma unmapping. + */ static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = &iser_task->desc; - struct iser_conn *ib_conn = task->conn->dd_data; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->ib_conn.device; + + /* DEVICE_REMOVAL event might have already released the device */ + if (!device) + return; ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -306,7 +348,20 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } -static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +/** + * iscsi_iser_check_protection() - check protection information status of task. + * @task: iscsi task + * @sector: error sector if exsists (output) + * + * Return: zero if no data-integrity errors have occured + * 0x1: data-integrity error occured in the guard-block + * 0x2: data-integrity error occured in the reference tag + * 0x3: data-integrity error occured in the application tag + * + * In addition the error sector is marked. + */ +static u8 +iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -318,8 +373,17 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) sector); } +/** + * iscsi_iser_conn_create() - create a new iscsi-iser connection + * @cls_session: iscsi class connection + * @conn_idx: connection index within the session (for MCS) + * + * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL + * otherwise. + */ static struct iscsi_cls_conn * -iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) +iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, + uint32_t conn_idx) { struct iscsi_conn *conn; struct iscsi_cls_conn *cls_conn; @@ -338,14 +402,25 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) return cls_conn; } +/** + * iscsi_iser_conn_bind() - bind iscsi and iser connection structures + * @cls_session: iscsi class session + * @cls_conn: iscsi class connection + * @transport_eph: transport end-point handle + * @is_leading: indicate if this is the session leading connection (MCS) + * + * Return: zero on success, $error if iscsi_conn_bind fails and + * -EINVAL in case end-point doesn't exsits anymore or iser connection + * state is not UP (teardown already started). + */ static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, - struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, + struct iscsi_cls_conn *cls_conn, + uint64_t transport_eph, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_session *session; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct iscsi_endpoint *ep; int error; @@ -361,56 +436,100 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, (unsigned long long)transport_eph); return -EINVAL; } - ib_conn = ep->dd_data; + iser_conn = ep->dd_data; + + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_UP) { + error = -EINVAL; + iser_err("iser_conn %p state is %d, teardown started\n", + iser_conn, iser_conn->state); + goto out; + } - session = conn->session; - if (iser_alloc_rx_descriptors(ib_conn, session)) - return -ENOMEM; + error = iser_alloc_rx_descriptors(iser_conn, conn->session); + if (error) + goto out; /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ - iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn); - conn->dd_data = ib_conn; - ib_conn->iscsi_conn = conn; + conn->dd_data = iser_conn; + iser_conn->iscsi_conn = conn; - return 0; +out: + mutex_unlock(&iser_conn->state_mutex); + return error; } +/** + * iscsi_iser_conn_start() - start iscsi-iser connection + * @cls_conn: iscsi class connection + * + * Notes: Here iser intialize (or re-initialize) stop_completion as + * from this point iscsi must call conn_stop in session/connection + * teardown so iser transport must wait for it. + */ static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *iscsi_conn; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; iscsi_conn = cls_conn->dd_data; - ib_conn = iscsi_conn->dd_data; - reinit_completion(&ib_conn->stop_completion); + iser_conn = iscsi_conn->dd_data; + reinit_completion(&iser_conn->stop_completion); return iscsi_conn_start(cls_conn); } +/** + * iscsi_iser_conn_stop() - stop iscsi-iser connection + * @cls_conn: iscsi class connection + * @flag: indicate if recover or terminate (passed as is) + * + * Notes: Calling iscsi_conn_stop might theoretically race with + * DEVICE_REMOVAL event and dereference a previously freed RDMA device + * handle, so we call it under iser the state lock to protect against + * this kind of race. + */ static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; - iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); - iscsi_conn_stop(cls_conn, flag); + iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn); /* * Userspace may have goofed up and not bound the connection or * might have only partially setup the connection. */ - if (ib_conn) { + if (iser_conn) { + mutex_lock(&iser_conn->state_mutex); + iscsi_conn_stop(cls_conn, flag); + iser_conn_terminate(iser_conn); + + /* unbind */ + iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; - complete(&ib_conn->stop_completion); + + complete(&iser_conn->stop_completion); + mutex_unlock(&iser_conn->state_mutex); + } else { + iscsi_conn_stop(cls_conn, flag); } } -static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +/** + * iscsi_iser_session_destroy() - destroy iscsi-iser session + * @cls_session: iscsi class session + * + * Removes and free iscsi host. + */ +static void +iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); @@ -430,6 +549,16 @@ iser_dif_prot_caps(int prot_caps) SHOST_DIX_TYPE3_PROTECTION : 0); } +/** + * iscsi_iser_session_create() - create an iscsi-iser session + * @ep: iscsi end-point handle + * @cmds_max: maximum commands in this session + * @qdepth: session command queue depth + * @initial_cmdsn: initiator command sequnce number + * + * Allocates and adds a scsi host, expose DIF supprot if + * exists, and sets up an iscsi session. + */ static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -438,7 +567,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct Scsi_Host *shost; - struct iser_conn *ib_conn = NULL; + struct iser_conn *iser_conn = NULL; + struct ib_conn *ib_conn; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -455,7 +585,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * the leading conn's ep so this will be NULL; */ if (ep) { - ib_conn = ep->dd_data; + iser_conn = ep->dd_data; + ib_conn = &iser_conn->ib_conn; if (ib_conn->pi_support) { u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; @@ -467,8 +598,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } } - if (iscsi_host_add(shost, - ep ? ib_conn->device->ib_device->dma_device : NULL)) + if (iscsi_host_add(shost, ep ? + ib_conn->device->ib_device->dma_device : NULL)) goto free_host; if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { @@ -540,6 +671,13 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, return 0; } +/** + * iscsi_iser_set_param() - set class connection parameter + * @cls_conn: iscsi class connection + * @stats: iscsi stats to output + * + * Output connection statistics. + */ static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) { @@ -568,18 +706,18 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param, char *buf) { - struct iser_conn *ib_conn = ep->dd_data; + struct iser_conn *iser_conn = ep->dd_data; int len; switch (param) { case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_CONN_ADDRESS: - if (!ib_conn || !ib_conn->cma_id) + if (!iser_conn || !iser_conn->ib_conn.cma_id) return -ENOTCONN; return iscsi_conn_get_addr_param((struct sockaddr_storage *) - &ib_conn->cma_id->route.addr.dst_addr, - param, buf); + &iser_conn->ib_conn.cma_id->route.addr.dst_addr, + param, buf); break; default: return -ENOSYS; @@ -588,48 +726,85 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, return len; } +/** + * iscsi_iser_ep_connect() - Initiate iSER connection establishment + * @shost: scsi_host + * @dst_addr: destination address + * @non-blocking: indicate if routine can block + * + * Allocate an iscsi endpoint, an iser_conn structure and bind them. + * After that start RDMA connection establishment via rdma_cm. We + * don't allocate iser_conn embedded in iscsi_endpoint since in teardown + * the endpoint will be destroyed at ep_disconnect while iser_conn will + * cleanup its resources asynchronuously. + * + * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error) + * if fails. + */ static struct iscsi_endpoint * iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) { int err; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct iscsi_endpoint *ep; - ep = iscsi_create_endpoint(sizeof(*ib_conn)); + ep = iscsi_create_endpoint(0); if (!ep) return ERR_PTR(-ENOMEM); - ib_conn = ep->dd_data; - ib_conn->ep = ep; - iser_conn_init(ib_conn); + iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); + if (!iser_conn) { + err = -ENOMEM; + goto failure; + } + + ep->dd_data = iser_conn; + iser_conn->ep = ep; + iser_conn_init(iser_conn); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, - non_blocking); + err = iser_connect(iser_conn, NULL, dst_addr, non_blocking); if (err) - return ERR_PTR(err); + goto failure; return ep; +failure: + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); } +/** + * iscsi_iser_ep_poll() - poll for iser connection establishment to complete + * @ep: iscsi endpoint (created at ep_connect) + * @timeout_ms: polling timeout allowed in ms. + * + * This routine boils down to waiting for up_completion signaling + * that cma_id got CONNECTED event. + * + * Return: 1 if succeeded in connection establishment, 0 if timeout expired + * (libiscsi will retry will kick in) or -1 if interrupted by signal + * or more likely iser connection state transitioned to TEMINATING or + * DOWN during the wait period. + */ static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int rc; - ib_conn = ep->dd_data; - rc = wait_event_interruptible_timeout(ib_conn->wait, - ib_conn->state == ISER_CONN_UP, - msecs_to_jiffies(timeout_ms)); - + iser_conn = ep->dd_data; + rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion, + msecs_to_jiffies(timeout_ms)); /* if conn establishment failed, return error code to iscsi */ - if (!rc && - (ib_conn->state == ISER_CONN_TERMINATING || - ib_conn->state == ISER_CONN_DOWN)) - rc = -1; + if (rc == 0) { + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state == ISER_CONN_TERMINATING || + iser_conn->state == ISER_CONN_DOWN) + rc = -1; + mutex_unlock(&iser_conn->state_mutex); + } - iser_info("ib conn %p rc = %d\n", ib_conn, rc); + iser_info("ib conn %p rc = %d\n", iser_conn, rc); if (rc > 0) return 1; /* success, this is the equivalent of POLLOUT */ @@ -639,26 +814,43 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) return rc; /* signal */ } +/** + * iscsi_iser_ep_disconnect() - Initiate connection teardown process + * @ep: iscsi endpoint handle + * + * This routine is not blocked by iser and RDMA termination process + * completion as we queue a deffered work for iser/RDMA destruction + * and cleanup or actually call it immediately in case we didn't pass + * iscsi conn bind/start stage, thus it is safe. + */ static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; + + iser_conn = ep->dd_data; + iser_info("ep %p iser conn %p state %d\n", + ep, iser_conn, iser_conn->state); - ib_conn = ep->dd_data; - iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); - iser_conn_terminate(ib_conn); + mutex_lock(&iser_conn->state_mutex); + iser_conn_terminate(iser_conn); /* - * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop - * call and ISER_CONN_DOWN state before freeing the iser resources. - * otherwise we are safe to free resources immediately. + * if iser_conn and iscsi_conn are bound, we must wait for + * iscsi_conn_stop and flush errors completion before freeing + * the iser resources. Otherwise we are safe to free resources + * immediately. */ - if (ib_conn->iscsi_conn) { - INIT_WORK(&ib_conn->release_work, iser_release_work); - queue_work(release_wq, &ib_conn->release_work); + if (iser_conn->iscsi_conn) { + INIT_WORK(&iser_conn->release_work, iser_release_work); + queue_work(release_wq, &iser_conn->release_work); + mutex_unlock(&iser_conn->state_mutex); } else { - iser_conn_release(ib_conn); + iser_conn->state = ISER_CONN_DOWN; + mutex_unlock(&iser_conn->state_mutex); + iser_conn_release(iser_conn); } + iscsi_destroy_endpoint(ep); } static umode_t iser_attr_is_visible(int param_type, int param) @@ -819,7 +1011,7 @@ register_transport_failure: static void __exit iser_exit(void) { - struct iser_conn *ib_conn, *n; + struct iser_conn *iser_conn, *n; int connlist_empty; iser_dbg("Removing iSER datamover...\n"); @@ -832,8 +1024,9 @@ static void __exit iser_exit(void) if (!connlist_empty) { iser_err("Error cleanup stage completed but we still have iser " "connections, destroying them anyway.\n"); - list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { - iser_conn_release(ib_conn); + list_for_each_entry_safe(iser_conn, n, &ig.connlist, + conn_list) { + iser_conn_release(iser_conn); } } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 97cd385bf7f7..cd4174ca9a76 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -69,39 +69,38 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.4" +#define DRV_VER "1.4.8" -#define iser_dbg(fmt, arg...) \ - do { \ - if (iser_debug_level > 2) \ - printk(KERN_DEBUG PFX "%s:" fmt,\ - __func__ , ## arg); \ +#define iser_dbg(fmt, arg...) \ + do { \ + if (iser_debug_level > 2) \ + printk(KERN_DEBUG PFX "%s: " fmt,\ + __func__ , ## arg); \ } while (0) #define iser_warn(fmt, arg...) \ do { \ if (iser_debug_level > 0) \ - pr_warn(PFX "%s:" fmt, \ + pr_warn(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define iser_info(fmt, arg...) \ do { \ if (iser_debug_level > 1) \ - pr_info(PFX "%s:" fmt, \ + pr_info(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define iser_err(fmt, arg...) \ do { \ - printk(KERN_ERR PFX "%s:" fmt, \ + printk(KERN_ERR PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) - /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISER_DEF_XMIT_CMDS_DEFAULT 512 @@ -145,18 +144,32 @@ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) +#define ISER_WC_BATCH_COUNT 16 +#define ISER_SIGNAL_CMD_COUNT 32 + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL +#define ISER_BEACON_WRID 0xfffffffffffffffeULL +/** + * struct iser_hdr - iSER header + * + * @flags: flags support (zbva, remote_inv) + * @rsvd: reserved + * @write_stag: write rkey + * @write_va: write virtual address + * @reaf_stag: read rkey + * @read_va: read virtual address + */ struct iser_hdr { u8 flags; u8 rsvd[3]; - __be32 write_stag; /* write rkey */ + __be32 write_stag; __be64 write_va; - __be32 read_stag; /* read rkey */ + __be32 read_stag; __be64 read_va; } __attribute__((packed)); @@ -179,7 +192,7 @@ struct iser_cm_hdr { /* Length of an object name string */ #define ISER_OBJECT_NAME_SIZE 64 -enum iser_ib_conn_state { +enum iser_conn_state { ISER_CONN_INIT, /* descriptor allocd, no conn */ ISER_CONN_PENDING, /* in the process of being established */ ISER_CONN_UP, /* up and running */ @@ -200,23 +213,42 @@ enum iser_data_dir { ISER_DIRS_NUM }; +/** + * struct iser_data_buf - iSER data buffer + * + * @buf: pointer to the sg list + * @size: num entries of this sg + * @data_len: total beffer byte len + * @dma_nents: returned by dma_map_sg + * @copy_buf: allocated copy buf for SGs unaligned + * for rdma which are copied + * @sg_single: SG-ified clone of a non SG SC or + * unaligned SG + */ struct iser_data_buf { - void *buf; /* pointer to the sg list */ - unsigned int size; /* num entries of this sg */ - unsigned long data_len; /* total data len */ - unsigned int dma_nents; /* returned by dma_map_sg */ - char *copy_buf; /* allocated copy buf for SGs unaligned * - * for rdma which are copied */ - struct scatterlist sg_single; /* SG-ified clone of a non SG SC or * - * unaligned SG */ + void *buf; + unsigned int size; + unsigned long data_len; + unsigned int dma_nents; + char *copy_buf; + struct scatterlist sg_single; }; /* fwd declarations */ struct iser_device; -struct iser_cq_desc; struct iscsi_iser_task; struct iscsi_endpoint; +/** + * struct iser_mem_reg - iSER memory registration info + * + * @lkey: MR local key + * @rkey: MR remote key + * @va: MR start address (buffer va) + * @len: MR length + * @mem_h: pointer to registration context (FMR/Fastreg) + * @is_mr: indicates weather we registered the buffer + */ struct iser_mem_reg { u32 lkey; u32 rkey; @@ -226,11 +258,20 @@ struct iser_mem_reg { int is_mr; }; +/** + * struct iser_regd_buf - iSER buffer registration desc + * + * @reg: memory registration info + * @virt_addr: virtual address of buffer + * @device: reference to iser device + * @direction: dma direction (for dma_unmap) + * @data_size: data buffer size in bytes + */ struct iser_regd_buf { - struct iser_mem_reg reg; /* memory registration info */ + struct iser_mem_reg reg; void *virt_addr; - struct iser_device *device; /* device->device for dma_unmap */ - enum dma_data_direction direction; /* direction for dma_unmap */ + struct iser_device *device; + enum dma_data_direction direction; unsigned int data_size; }; @@ -240,19 +281,39 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; +/** + * struct iser_tx_desc - iSER TX descriptor (for send wr_id) + * + * @iser_header: iser header + * @iscsi_header: iscsi header + * @type: command/control/dataout + * @dam_addr: header buffer dma_address + * @tx_sg: sg[0] points to iser/iscsi headers + * sg[1] optionally points to either of immediate data + * unsolicited data-out or control + * @num_sge: number sges used on this TX task + */ struct iser_tx_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; enum iser_desc_type type; u64 dma_addr; - /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either - of immediate data, unsolicited data-out or control (login,text) */ struct ib_sge tx_sg[2]; int num_sge; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ sizeof(u64) + sizeof(struct ib_sge))) +/** + * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) + * + * @iser_header: iser header + * @iscsi_header: iscsi header + * @data: received data segment + * @dma_addr: receive buffer dma address + * @rx_sg: ib_sge of receive buffer + * @pad: for sense data TODO: Modify to maximum sense length supported + */ struct iser_rx_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; @@ -265,25 +326,59 @@ struct iser_rx_desc { #define ISER_MAX_CQ 4 struct iser_conn; +struct ib_conn; struct iscsi_iser_task; +/** + * struct iser_comp - iSER completion context + * + * @device: pointer to device handle + * @cq: completion queue + * @wcs: work completion array + * @tasklet: Tasklet handle + * @active_qps: Number of active QPs attached + * to completion context + */ +struct iser_comp { + struct iser_device *device; + struct ib_cq *cq; + struct ib_wc wcs[ISER_WC_BATCH_COUNT]; + struct tasklet_struct tasklet; + int active_qps; +}; + +/** + * struct iser_device - iSER device handle + * + * @ib_device: RDMA device + * @pd: Protection Domain for this device + * @dev_attr: Device attributes container + * @mr: Global DMA memory region + * @event_handler: IB events handle routine + * @ig_list: entry in devices list + * @refcount: Reference counter, dominated by open iser connections + * @comps_used: Number of completion contexts used, Min between online + * cpus and device max completion vectors + * @comps: Dinamically allocated array of completion handlers + * Memory registration pool Function pointers (FMR or Fastreg): + * @iser_alloc_rdma_reg_res: Allocation of memory regions pool + * @iser_free_rdma_reg_res: Free of memory regions pool + * @iser_reg_rdma_mem: Memory registration routine + * @iser_unreg_rdma_mem: Memory deregistration routine + */ struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; struct ib_device_attr dev_attr; - struct ib_cq *rx_cq[ISER_MAX_CQ]; - struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; - struct tasklet_struct cq_tasklet[ISER_MAX_CQ]; struct ib_event_handler event_handler; - struct list_head ig_list; /* entry in ig devices list */ + struct list_head ig_list; int refcount; - int cq_active_qps[ISER_MAX_CQ]; - int cqs_used; - struct iser_cq_desc *cq_desc; - int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn, + int comps_used; + struct iser_comp comps[ISER_MAX_CQ]; + int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn, unsigned cmds_max); - void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn); + void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn); int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, @@ -301,76 +396,160 @@ enum iser_reg_indicator { ISER_FASTREG_PROTECTED = 1 << 3, }; +/** + * struct iser_pi_context - Protection information context + * + * @prot_mr: protection memory region + * @prot_frpl: protection fastreg page list + * @sig_mr: signature feature enabled memory region + */ struct iser_pi_context { struct ib_mr *prot_mr; struct ib_fast_reg_page_list *prot_frpl; struct ib_mr *sig_mr; }; +/** + * struct fast_reg_descriptor - Fast registration descriptor + * + * @list: entry in connection fastreg pool + * @data_mr: data memory region + * @data_frpl: data fastreg page list + * @pi_ctx: protection information context + * @reg_indicators: fast registration indicators + */ struct fast_reg_descriptor { struct list_head list; - /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; struct iser_pi_context *pi_ctx; - /* registration indicators container */ u8 reg_indicators; }; +/** + * struct ib_conn - Infiniband related objects + * + * @cma_id: rdma_cm connection maneger handle + * @qp: Connection Queue-pair + * @post_recv_buf_count: post receive counter + * @rx_wr: receive work request for batch posts + * @device: reference to iser device + * @comp: iser completion context + * @pi_support: Indicate device T10-PI support + * @beacon: beacon send wr to signal all flush errors were drained + * @flush_comp: completes when all connection completions consumed + * @lock: protects fmr/fastreg pool + * @union.fmr: + * @pool: FMR pool for fast registrations + * @page_vec: page vector to hold mapped commands pages + * used for registration + * @union.fastreg: + * @pool: Fast registration descriptors pool for fast + * registrations + * @pool_size: Size of pool + */ +struct ib_conn { + struct rdma_cm_id *cma_id; + struct ib_qp *qp; + int post_recv_buf_count; + struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + struct iser_device *device; + struct iser_comp *comp; + bool pi_support; + struct ib_send_wr beacon; + struct completion flush_comp; + spinlock_t lock; + union { + struct { + struct ib_fmr_pool *pool; + struct iser_page_vec *page_vec; + } fmr; + struct { + struct list_head pool; + int pool_size; + } fastreg; + }; +}; + +/** + * struct iser_conn - iSER connection context + * + * @ib_conn: connection RDMA resources + * @iscsi_conn: link to matching iscsi connection + * @ep: transport handle + * @state: connection logical state + * @qp_max_recv_dtos: maximum number of data outs, corresponds + * to max number of post recvs + * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) + * @min_posted_rx: (qp_max_recv_dtos >> 2) + * @name: connection peer portal + * @release_work: deffered work for release job + * @state_mutex: protects iser onnection state + * @stop_completion: conn_stop completion + * @ib_completion: RDMA cleanup completion + * @up_completion: connection establishment completed + * (state is ISER_CONN_UP) + * @conn_list: entry in ig conn list + * @login_buf: login data buffer (stores login parameters) + * @login_req_buf: login request buffer + * @login_req_dma: login request buffer dma address + * @login_resp_buf: login response buffer + * @login_resp_dma: login response buffer dma address + * @rx_desc_head: head of rx_descs cyclic buffer + * @rx_descs: rx buffers array (cyclic buffer) + * @num_rx_descs: number of rx descriptors + */ struct iser_conn { + struct ib_conn ib_conn; struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; - enum iser_ib_conn_state state; /* rdma connection state */ - atomic_t refcount; - spinlock_t lock; /* used for state changes */ - struct iser_device *device; /* device context */ - struct rdma_cm_id *cma_id; /* CMA ID */ - struct ib_qp *qp; /* QP */ - wait_queue_head_t wait; /* waitq for conn/disconn */ - unsigned qp_max_recv_dtos; /* num of rx buffers */ - unsigned qp_max_recv_dtos_mask; /* above minus 1 */ - unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ - int post_recv_buf_count; /* posted rx count */ - atomic_t post_send_buf_count; /* posted tx count */ + enum iser_conn_state state; + unsigned qp_max_recv_dtos; + unsigned qp_max_recv_dtos_mask; + unsigned min_posted_rx; char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; + struct mutex state_mutex; struct completion stop_completion; - struct list_head conn_list; /* entry in ig conn list */ + struct completion ib_completion; + struct completion up_completion; + struct list_head conn_list; char *login_buf; char *login_req_buf, *login_resp_buf; u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; - bool pi_support; - - /* Connection memory registration pool */ - union { - struct { - struct ib_fmr_pool *pool; /* pool of IB FMRs */ - struct iser_page_vec *page_vec; /* represents SG to fmr maps* - * maps serialized as tx is*/ - } fmr; - struct { - struct list_head pool; - int pool_size; - } fastreg; - }; + u32 num_rx_descs; }; +/** + * struct iscsi_iser_task - iser task context + * + * @desc: TX descriptor + * @iser_conn: link to iser connection + * @status: current task status + * @sc: link to scsi command + * @command_sent: indicate if command was sent + * @dir: iser data direction + * @rdma_regd: task rdma registration desc + * @data: iser data buffer desc + * @data_copy: iser data copy buffer desc (bounce buffer) + * @prot: iser protection buffer desc + * @prot_copy: iser protection copy buffer desc (bounce buffer) + */ struct iscsi_iser_task { struct iser_tx_desc desc; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; enum iser_task_status status; struct scsi_cmnd *sc; - int command_sent; /* set if command sent */ - int dir[ISER_DIRS_NUM]; /* set if dir use*/ - struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ - struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ - struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ - struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ - struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ + int command_sent; + int dir[ISER_DIRS_NUM]; + struct iser_regd_buf rdma_regd[ISER_DIRS_NUM]; + struct iser_data_buf data[ISER_DIRS_NUM]; + struct iser_data_buf data_copy[ISER_DIRS_NUM]; + struct iser_data_buf prot[ISER_DIRS_NUM]; + struct iser_data_buf prot_copy[ISER_DIRS_NUM]; }; struct iser_page_vec { @@ -380,17 +559,20 @@ struct iser_page_vec { int data_size; }; -struct iser_cq_desc { - struct iser_device *device; - int cq_index; -}; - +/** + * struct iser_global: iSER global context + * + * @device_list_mutex: protects device_list + * @device_list: iser devices global list + * @connlist_mutex: protects connlist + * @connlist: iser connections global list + * @desc_cache: kmem cache for tx dataout + */ struct iser_global { - struct mutex device_list_mutex;/* */ - struct list_head device_list; /* all iSER devices */ + struct mutex device_list_mutex; + struct list_head device_list; struct mutex connlist_mutex; - struct list_head connlist; /* all iSER IB connections */ - + struct list_head connlist; struct kmem_cache *desc_cache; }; @@ -399,9 +581,6 @@ extern int iser_debug_level; extern bool iser_pi_enable; extern int iser_pi_guard; -/* allocate connection resources needed for rdma functionality */ -int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); - int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task); @@ -413,29 +592,30 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, - struct iscsi_hdr *hdr, - char *rx_data, - int rx_data_len); + struct iscsi_hdr *hdr, + char *rx_data, + int rx_data_len); -void iser_conn_init(struct iser_conn *ib_conn); +void iser_conn_init(struct iser_conn *iser_conn); -void iser_conn_release(struct iser_conn *ib_conn); +void iser_conn_release(struct iser_conn *iser_conn); -void iser_conn_terminate(struct iser_conn *ib_conn); +int iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); void iser_rcv_completion(struct iser_rx_desc *desc, - unsigned long dto_xfer_len, - struct iser_conn *ib_conn); + unsigned long dto_xfer_len, + struct ib_conn *ib_conn); -void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn); +void iser_snd_completion(struct iser_tx_desc *desc, + struct ib_conn *ib_conn); void iser_task_rdma_init(struct iscsi_iser_task *task); void iser_task_rdma_finalize(struct iscsi_iser_task *task); -void iser_free_rx_descriptors(struct iser_conn *ib_conn); +void iser_free_rx_descriptors(struct iser_conn *iser_conn); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, @@ -447,38 +627,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, - int non_blocking); +int iser_connect(struct iser_conn *iser_conn, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, + int non_blocking); -int iser_reg_page_vec(struct iser_conn *ib_conn, +int iser_reg_page_vec(struct ib_conn *ib_conn, struct iser_page_vec *page_vec, - struct iser_mem_reg *mem_reg); + struct iser_mem_reg *mem_reg); void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -int iser_post_recvl(struct iser_conn *ib_conn); -int iser_post_recvm(struct iser_conn *ib_conn, int count); -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc); +int iser_post_recvl(struct iser_conn *iser_conn); +int iser_post_recvm(struct iser_conn *iser_conn, int count); +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, + bool signal); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir); + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); -int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_fastreg_pool(struct iser_conn *ib_conn); +int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, + struct iscsi_session *session); +int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max); +void iser_free_fmr_pool(struct ib_conn *ib_conn); +int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct ib_conn *ib_conn); u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 8d44a4060634..5a489ea63732 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -160,10 +160,10 @@ iser_prepare_write_cmd(struct iscsi_task *task, } /* creates a new tx descriptor and adds header regd buffer */ -static void iser_create_send_desc(struct iser_conn *ib_conn, +static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -179,103 +179,108 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } } -static void iser_free_login_buf(struct iser_conn *ib_conn) +static void iser_free_login_buf(struct iser_conn *iser_conn) { - if (!ib_conn->login_buf) + struct iser_device *device = iser_conn->ib_conn.device; + + if (!iser_conn->login_buf) return; - if (ib_conn->login_req_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_req_dma, + if (iser_conn->login_req_dma) + ib_dma_unmap_single(device->ib_device, + iser_conn->login_req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_resp_dma, + if (iser_conn->login_resp_dma) + ib_dma_unmap_single(device->ib_device, + iser_conn->login_resp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); + kfree(iser_conn->login_buf); /* make sure we never redo any unmapping */ - ib_conn->login_req_dma = 0; - ib_conn->login_resp_dma = 0; - ib_conn->login_buf = NULL; + iser_conn->login_req_dma = 0; + iser_conn->login_resp_dma = 0; + iser_conn->login_buf = NULL; } -static int iser_alloc_login_buf(struct iser_conn *ib_conn) +static int iser_alloc_login_buf(struct iser_conn *iser_conn) { - struct iser_device *device; + struct iser_device *device = iser_conn->ib_conn.device; int req_err, resp_err; - BUG_ON(ib_conn->device == NULL); + BUG_ON(device == NULL); - device = ib_conn->device; - - ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!ib_conn->login_buf) + if (!iser_conn->login_buf) goto out_err; - ib_conn->login_req_buf = ib_conn->login_buf; - ib_conn->login_resp_buf = ib_conn->login_buf + + iser_conn->login_req_buf = iser_conn->login_buf; + iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; - ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, + iser_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); - ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, + iser_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, - ib_conn->login_req_dma); + iser_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, - ib_conn->login_resp_dma); + iser_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) - ib_conn->login_req_dma = 0; + iser_conn->login_req_dma = 0; if (resp_err) - ib_conn->login_resp_dma = 0; + iser_conn->login_resp_dma = 0; goto free_login_buf; } return 0; free_login_buf: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); out_err: iser_err("unable to alloc or map login buf\n"); return -ENOMEM; } -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session) +int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, + struct iscsi_session *session) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; - struct iser_device *device = ib_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; - ib_conn->qp_max_recv_dtos = session->cmds_max; - ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ - ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; + iser_conn->qp_max_recv_dtos = session->cmds_max; + iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ + iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) goto create_rdma_reg_res_failed; - if (iser_alloc_login_buf(ib_conn)) + if (iser_alloc_login_buf(iser_conn)) goto alloc_login_buf_fail; - ib_conn->rx_descs = kmalloc(session->cmds_max * + iser_conn->num_rx_descs = session->cmds_max; + iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs * sizeof(struct iser_rx_desc), GFP_KERNEL); - if (!ib_conn->rx_descs) + if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; - rx_desc = ib_conn->rx_descs; + rx_desc = iser_conn->rx_descs; - for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) { + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) @@ -289,18 +294,18 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s rx_sg->lkey = device->mr->lkey; } - ib_conn->rx_desc_head = 0; + iser_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: - rx_desc = ib_conn->rx_descs; + rx_desc = iser_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); - ib_conn->rx_descs = NULL; + kfree(iser_conn->rx_descs); + iser_conn->rx_descs = NULL; rx_desc_alloc_fail: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); alloc_login_buf_fail: device->iser_free_rdma_reg_res(ib_conn); create_rdma_reg_res_failed: @@ -308,33 +313,35 @@ create_rdma_reg_res_failed: return -ENOMEM; } -void iser_free_rx_descriptors(struct iser_conn *ib_conn) +void iser_free_rx_descriptors(struct iser_conn *iser_conn) { int i; struct iser_rx_desc *rx_desc; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; - if (!ib_conn->rx_descs) + if (!iser_conn->rx_descs) goto free_login_buf; if (device->iser_free_rdma_reg_res) device->iser_free_rdma_reg_res(ib_conn); - rx_desc = ib_conn->rx_descs; - for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) + rx_desc = iser_conn->rx_descs; + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); + kfree(iser_conn->rx_descs); /* make sure we never redo any unmapping */ - ib_conn->rx_descs = NULL; + iser_conn->rx_descs = NULL; free_login_buf: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); } static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iscsi_session *session = conn->session; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -343,34 +350,37 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) return 0; /* - * Check that there is one posted recv buffer (for the last login - * response) and no posted send buffers left - they must have been - * consumed during previous login phases. + * Check that there is one posted recv buffer + * (for the last login response). */ WARN_ON(ib_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - ib_conn->min_posted_rx); + iser_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx)) + if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) return -ENOMEM; return 0; } +static inline bool iser_signal_comp(int sig_count) +{ + return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); +} + /** * iser_send_command - send command PDU */ int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; @@ -378,12 +388,13 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; + static unsigned sig_count; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(ib_conn, tx_desc); + iser_create_send_desc(iser_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; @@ -423,7 +434,8 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(ib_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc, + iser_signal_comp(++sig_count)); if (!err) return 0; @@ -439,7 +451,7 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_regd_buf *regd_buf; @@ -488,7 +500,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(ib_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc, true); if (!err) return 0; @@ -501,7 +513,7 @@ send_data_out_error: int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; @@ -510,9 +522,9 @@ int iser_send_control(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(ib_conn, mdesc); + iser_create_send_desc(iser_conn, mdesc); - device = ib_conn->device; + device = iser_conn->ib_conn.device; data_seg_len = ntoh24(task->hdr->dlength); @@ -524,16 +536,16 @@ int iser_send_control(struct iscsi_conn *conn, } ib_dma_sync_single_for_cpu(device->ib_device, - ib_conn->login_req_dma, task->data_count, + iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - memcpy(ib_conn->login_req_buf, task->data, task->data_count); + memcpy(iser_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, - ib_conn->login_req_dma, task->data_count, + iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = ib_conn->login_req_dma; + tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; @@ -542,7 +554,7 @@ int iser_send_control(struct iscsi_conn *conn, if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); - err = iser_post_recvl(ib_conn); + err = iser_post_recvl(iser_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); @@ -550,7 +562,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(ib_conn, mdesc); + err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return 0; @@ -564,15 +576,17 @@ send_control_error: */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, - struct iser_conn *ib_conn) + struct ib_conn *ib_conn) { + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == ib_conn->login_resp_buf) { - rx_dma = ib_conn->login_resp_dma; + if ((char *)rx_desc == iser_conn->login_resp_buf) { + rx_dma = iser_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; @@ -580,14 +594,14 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, @@ -599,21 +613,21 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; - if (rx_dma == ib_conn->login_resp_dma) + if (rx_dma == iser_conn->login_resp_dma) return; outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) { - count = min(ib_conn->qp_max_recv_dtos - outstanding, - ib_conn->min_posted_rx); - err = iser_post_recvm(ib_conn, count); + if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { + count = min(iser_conn->qp_max_recv_dtos - outstanding, + iser_conn->min_posted_rx); + err = iser_post_recvm(iser_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); } } void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct iser_conn *ib_conn) + struct ib_conn *ib_conn) { struct iscsi_task *task; struct iser_device *device = ib_conn->device; @@ -625,8 +639,6 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, tx_desc = NULL; } - atomic_dec(&ib_conn->post_send_buf_count); - if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - @@ -658,7 +670,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; int is_rdma_data_aligned = 1; int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 47acd3ad3a17..6c5ce357fba6 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - struct ib_device *dev = iser_task->ib_conn->device->ib_device; + struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; struct scatterlist *sgl = (struct scatterlist *)data->buf; struct scatterlist *sg; char *mem = NULL; @@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct ib_device *dev; unsigned long cmd_data_len; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct ib_device *dev; iser_task->dir[iser_dir] = 1; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, { struct ib_device *dev; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } @@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, int aligned_len) { - struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn; + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -377,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->ib_conn; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; @@ -432,7 +432,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, ib_conn->fmr.page_vec->offset); for (i = 0; i < ib_conn->fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->fmr.page_vec->pages[i]); + (unsigned long long)ib_conn->fmr.page_vec->pages[i]); } if (err) return err; @@ -440,77 +440,74 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static inline enum ib_t10_dif_type -scsi2ib_prot_type(unsigned char prot_type) +static inline void +iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, + struct ib_sig_domain *domain) { - switch (prot_type) { - case SCSI_PROT_DIF_TYPE0: - return IB_T10DIF_NONE; - case SCSI_PROT_DIF_TYPE1: - return IB_T10DIF_TYPE1; - case SCSI_PROT_DIF_TYPE2: - return IB_T10DIF_TYPE2; - case SCSI_PROT_DIF_TYPE3: - return IB_T10DIF_TYPE3; - default: - return IB_T10DIF_NONE; - } -} - + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.pi_interval = sc->device->sector_size; + domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff; + /* + * At the moment we hard code those, but in the future + * we will take them from sc. + */ + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 || + scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2) + domain->sig.dif.ref_remap = true; +}; static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) { - unsigned char scsi_ptype = scsi_get_prot_type(sc); - - sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; - sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; - switch (scsi_get_prot_op(sc)) { case SCSI_PROT_WRITE_INSERT: case SCSI_PROT_READ_STRIP: - sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; - sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; break; case SCSI_PROT_READ_INSERT: case SCSI_PROT_WRITE_STRIP: - sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; - sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + /* + * At the moment we use this modparam to tell what is + * the memory bg_type, in the future we will take it + * from sc. + */ + sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : + IB_T10DIF_CRC; break; case SCSI_PROT_READ_PASS: case SCSI_PROT_WRITE_PASS: - sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; - sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + /* + * At the moment we use this modparam to tell what is + * the memory bg_type, in the future we will take it + * from sc. + */ + sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : + IB_T10DIF_CRC; break; default: iser_err("Unsupported PI operation %d\n", scsi_get_prot_op(sc)); return -EINVAL; } + return 0; } - static int iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) { switch (scsi_get_prot_type(sc)) { case SCSI_PROT_DIF_TYPE0: - *mask = 0x0; break; case SCSI_PROT_DIF_TYPE1: case SCSI_PROT_DIF_TYPE2: @@ -533,7 +530,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct fast_reg_descriptor *desc, struct ib_sge *data_sge, struct ib_sge *prot_sge, struct ib_sge *sig_sge) { - struct iser_conn *ib_conn = iser_task->ib_conn; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_pi_context *pi_ctx = desc->pi_ctx; struct ib_send_wr sig_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; @@ -609,7 +606,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_sge *sge) { struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; - struct iser_conn *ib_conn = iser_task->ib_conn; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct ib_mr *mr; @@ -700,7 +697,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->ib_conn; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ea01075f9f9b..67225bb82bb5 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -39,8 +39,12 @@ #include "iscsi_iser.h" #define ISCSI_ISER_MAX_CONN 8 -#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) -#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ + ISCSI_ISER_MAX_CONN) + +static int iser_cq_poll_limit = 512; static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); @@ -71,9 +75,8 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - struct iser_cq_desc *cq_desc; struct ib_device_attr *dev_attr = &device->dev_attr; - int ret, i, j; + int ret, i; ret = ib_query_device(device->ib_device, dev_attr); if (ret) { @@ -101,47 +104,35 @@ static int iser_create_device_ib_res(struct iser_device *device) return -1; } - device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); + device->comps_used = min(ISER_MAX_CQ, + device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", - device->cqs_used, device->ib_device->name, + device->comps_used, device->ib_device->name, device->ib_device->num_comp_vectors); - device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, - GFP_KERNEL); - if (device->cq_desc == NULL) - goto cq_desc_err; - cq_desc = device->cq_desc; - device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) goto pd_err; - for (i = 0; i < device->cqs_used; i++) { - cq_desc[i].device = device; - cq_desc[i].cq_index = i; - - device->rx_cq[i] = ib_create_cq(device->ib_device, - iser_cq_callback, - iser_cq_event_callback, - (void *)&cq_desc[i], - ISER_MAX_RX_CQ_LEN, i); - if (IS_ERR(device->rx_cq[i])) - goto cq_err; - - device->tx_cq[i] = ib_create_cq(device->ib_device, - NULL, iser_cq_event_callback, - (void *)&cq_desc[i], - ISER_MAX_TX_CQ_LEN, i); - - if (IS_ERR(device->tx_cq[i])) + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + comp->device = device; + comp->cq = ib_create_cq(device->ib_device, + iser_cq_callback, + iser_cq_event_callback, + (void *)comp, + ISER_MAX_CQ_LEN, i); + if (IS_ERR(comp->cq)) { + comp->cq = NULL; goto cq_err; + } - if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) + if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) goto cq_err; - tasklet_init(&device->cq_tasklet[i], - iser_cq_tasklet_fn, - (unsigned long)&cq_desc[i]); + tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, + (unsigned long)comp); } device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | @@ -160,19 +151,17 @@ static int iser_create_device_ib_res(struct iser_device *device) handler_err: ib_dereg_mr(device->mr); dma_mr_err: - for (j = 0; j < device->cqs_used; j++) - tasklet_kill(&device->cq_tasklet[j]); + for (i = 0; i < device->comps_used; i++) + tasklet_kill(&device->comps[i].tasklet); cq_err: - for (j = 0; j < i; j++) { - if (device->tx_cq[j]) - ib_destroy_cq(device->tx_cq[j]); - if (device->rx_cq[j]) - ib_destroy_cq(device->rx_cq[j]); + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + if (comp->cq) + ib_destroy_cq(comp->cq); } ib_dealloc_pd(device->pd); pd_err: - kfree(device->cq_desc); -cq_desc_err: iser_err("failed to allocate an IB resource\n"); return -1; } @@ -186,20 +175,18 @@ static void iser_free_device_ib_res(struct iser_device *device) int i; BUG_ON(device->mr == NULL); - for (i = 0; i < device->cqs_used; i++) { - tasklet_kill(&device->cq_tasklet[i]); - (void)ib_destroy_cq(device->tx_cq[i]); - (void)ib_destroy_cq(device->rx_cq[i]); - device->tx_cq[i] = NULL; - device->rx_cq[i] = NULL; + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + tasklet_kill(&comp->tasklet); + ib_destroy_cq(comp->cq); + comp->cq = NULL; } (void)ib_unregister_event_handler(&device->event_handler); (void)ib_dereg_mr(device->mr); (void)ib_dealloc_pd(device->pd); - kfree(device->cq_desc); - device->mr = NULL; device->pd = NULL; } @@ -209,7 +196,7 @@ static void iser_free_device_ib_res(struct iser_device *device) * * returns 0 on success, or errno code on failure */ -int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max) { struct iser_device *device = ib_conn->device; struct ib_fmr_pool_param params; @@ -259,7 +246,7 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) /** * iser_free_fmr_pool - releases the FMR pool and page vec */ -void iser_free_fmr_pool(struct iser_conn *ib_conn) +void iser_free_fmr_pool(struct ib_conn *ib_conn) { iser_info("freeing conn %p fmr pool %p\n", ib_conn, ib_conn->fmr.pool); @@ -363,10 +350,10 @@ fast_reg_mr_failure: * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) { - struct iser_device *device = ib_conn->device; - struct fast_reg_descriptor *desc; + struct iser_device *device = ib_conn->device; + struct fast_reg_descriptor *desc; int i, ret; INIT_LIST_HEAD(&ib_conn->fastreg.pool); @@ -402,7 +389,7 @@ err: /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors */ -void iser_free_fastreg_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct ib_conn *ib_conn) { struct fast_reg_descriptor *desc, *tmp; int i = 0; @@ -436,7 +423,7 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn) * * returns 0 on success, -1 on failure */ -static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; @@ -451,28 +438,30 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) mutex_lock(&ig.connlist_mutex); /* select the CQ with the minimal number of usages */ - for (index = 0; index < device->cqs_used; index++) - if (device->cq_active_qps[index] < - device->cq_active_qps[min_index]) + for (index = 0; index < device->comps_used; index++) { + if (device->comps[index].active_qps < + device->comps[min_index].active_qps) min_index = index; - device->cq_active_qps[min_index]++; + } + ib_conn->comp = &device->comps[min_index]; + ib_conn->comp->active_qps++; mutex_unlock(&ig.connlist_mutex); iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = device->tx_cq[min_index]; - init_attr.recv_cq = device->rx_cq[min_index]; + init_attr.send_cq = ib_conn->comp->cq; + init_attr.recv_cq = ib_conn->comp->cq; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; if (ib_conn->pi_support) { - init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; + init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; } else { - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; } ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); @@ -491,33 +480,6 @@ out_err: } /** - * releases the QP objects, returns 0 on success, - * -1 on failure - */ -static int iser_free_ib_conn_res(struct iser_conn *ib_conn) -{ - int cq_index; - BUG_ON(ib_conn == NULL); - - iser_info("freeing conn %p cma_id %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->qp); - - /* qp is created only once both addr & route are resolved */ - - if (ib_conn->qp != NULL) { - cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; - ib_conn->device->cq_active_qps[cq_index]--; - - rdma_destroy_qp(ib_conn->cma_id); - } - - ib_conn->qp = NULL; - - return 0; -} - -/** * based on the resolved device node GUID see if there already allocated * device for this device. If there's no such, create one. */ @@ -568,96 +530,174 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, - enum iser_ib_conn_state comp, - enum iser_ib_conn_state exch) +/** + * Called with state mutex held + **/ +static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, + enum iser_conn_state comp, + enum iser_conn_state exch) { int ret; - spin_lock_bh(&ib_conn->lock); - if ((ret = (ib_conn->state == comp))) - ib_conn->state = exch; - spin_unlock_bh(&ib_conn->lock); + ret = (iser_conn->state == comp); + if (ret) + iser_conn->state = exch; + return ret; } void iser_release_work(struct work_struct *work) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; - ib_conn = container_of(work, struct iser_conn, release_work); + iser_conn = container_of(work, struct iser_conn, release_work); - /* wait for .conn_stop callback */ - wait_for_completion(&ib_conn->stop_completion); + /* Wait for conn_stop to complete */ + wait_for_completion(&iser_conn->stop_completion); + /* Wait for IB resouces cleanup to complete */ + wait_for_completion(&iser_conn->ib_completion); - /* wait for the qp`s post send and post receive buffers to empty */ - wait_event_interruptible(ib_conn->wait, - ib_conn->state == ISER_CONN_DOWN); + mutex_lock(&iser_conn->state_mutex); + iser_conn->state = ISER_CONN_DOWN; + mutex_unlock(&iser_conn->state_mutex); - iser_conn_release(ib_conn); + iser_conn_release(iser_conn); } /** - * Frees all conn objects and deallocs conn descriptor + * iser_free_ib_conn_res - release IB related resources + * @iser_conn: iser connection struct + * @destroy_device: indicator if we need to try to release + * the iser device (only iscsi shutdown and DEVICE_REMOVAL + * will use this. + * + * This routine is called with the iser state mutex held + * so the cm_id removal is out of here. It is Safe to + * be invoked multiple times. */ -void iser_conn_release(struct iser_conn *ib_conn) +static void iser_free_ib_conn_res(struct iser_conn *iser_conn, + bool destroy_device) { - struct iser_device *device = ib_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; + + iser_info("freeing conn %p cma_id %p qp %p\n", + iser_conn, ib_conn->cma_id, ib_conn->qp); + + iser_free_rx_descriptors(iser_conn); + + if (ib_conn->qp != NULL) { + ib_conn->comp->active_qps--; + rdma_destroy_qp(ib_conn->cma_id); + ib_conn->qp = NULL; + } + + if (destroy_device && device != NULL) { + iser_device_try_release(device); + ib_conn->device = NULL; + } +} - BUG_ON(ib_conn->state == ISER_CONN_UP); +/** + * Frees all conn objects and deallocs conn descriptor + */ +void iser_conn_release(struct iser_conn *iser_conn) +{ + struct ib_conn *ib_conn = &iser_conn->ib_conn; mutex_lock(&ig.connlist_mutex); - list_del(&ib_conn->conn_list); + list_del(&iser_conn->conn_list); mutex_unlock(&ig.connlist_mutex); - iser_free_rx_descriptors(ib_conn); - iser_free_ib_conn_res(ib_conn); - ib_conn->device = NULL; - /* on EVENT_ADDR_ERROR there's no device yet for this conn */ - if (device != NULL) - iser_device_try_release(device); - /* if cma handler context, the caller actually destroy the id */ + + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_DOWN) + iser_warn("iser conn %p state %d, expected state down.\n", + iser_conn, iser_conn->state); + /* + * In case we never got to bind stage, we still need to + * release IB resources (which is safe to call more than once). + */ + iser_free_ib_conn_res(iser_conn, true); + mutex_unlock(&iser_conn->state_mutex); + if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } - iscsi_destroy_endpoint(ib_conn->ep); + + kfree(iser_conn); } /** * triggers start of the disconnect procedures and wait for them to be done + * Called with state mutex held */ -void iser_conn_terminate(struct iser_conn *ib_conn) +int iser_conn_terminate(struct iser_conn *iser_conn) { + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct ib_send_wr *bad_wr; int err = 0; - /* change the ib conn state only if the conn is UP, however always call - * rdma_disconnect since this is the only way to cause the CMA to change - * the QP state to ERROR + /* terminate the iser conn only if the conn state is UP */ + if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, + ISER_CONN_TERMINATING)) + return 0; + + iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state); + + /* suspend queuing of new iscsi commands */ + if (iser_conn->iscsi_conn) + iscsi_suspend_queue(iser_conn->iscsi_conn); + + /* + * In case we didn't already clean up the cma_id (peer initiated + * a disconnection), we need to Cause the CMA to change the QP + * state to ERROR. */ + if (ib_conn->cma_id) { + err = rdma_disconnect(ib_conn->cma_id); + if (err) + iser_err("Failed to disconnect, conn: 0x%p err %d\n", + iser_conn, err); + + /* post an indication that all flush errors were consumed */ + err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); + if (err) + iser_err("conn %p failed to post beacon", ib_conn); + + wait_for_completion(&ib_conn->flush_comp); + } - iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); - err = rdma_disconnect(ib_conn->cma_id); - if (err) - iser_err("Failed to disconnect, conn: 0x%p err %d\n", - ib_conn,err); + return 1; } +/** + * Called with state mutex held + **/ static void iser_connect_error(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; - - ib_conn = (struct iser_conn *)cma_id->context; + struct iser_conn *iser_conn; - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + iser_conn = (struct iser_conn *)cma_id->context; + iser_conn->state = ISER_CONN_DOWN; } +/** + * Called with state mutex held + **/ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; + struct ib_conn *ib_conn; int ret; + iser_conn = (struct iser_conn *)cma_id->context; + if (iser_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + + ib_conn = &iser_conn->ib_conn; device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); @@ -665,7 +705,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) return; } - ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; /* connection T10-PI support */ @@ -689,18 +728,28 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } +/** + * Called with state mutex held + **/ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; struct iser_cm_hdr req_hdr; + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; - ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); + if (iser_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + + ret = iser_create_ib_conn_res(ib_conn); if (ret) goto failure; memset(&conn_param, 0, sizeof conn_param); - conn_param.responder_resources = 4; + conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; @@ -724,47 +773,60 @@ failure: static void iser_connected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; + iser_conn = (struct iser_conn *)cma_id->context; + if (iser_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); - ib_conn = (struct iser_conn *)cma_id->context; - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) - wake_up_interruptible(&ib_conn->wait); + iser_conn->state = ISER_CONN_UP; + complete(&iser_conn->up_completion); } static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; - ib_conn = (struct iser_conn *)cma_id->context; - - /* getting here when the state is UP means that the conn is being * - * terminated asynchronously from the iSCSI layer's perspective. */ - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)){ - if (ib_conn->iscsi_conn) - iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); + if (iser_conn_terminate(iser_conn)) { + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); else iser_err("iscsi_iser connection isn't bound\n"); } - - /* Complete the termination process if no posts are pending */ - if (ib_conn->post_recv_buf_count == 0 && - (atomic_read(&ib_conn->post_send_buf_count) == 0)) { - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); - } } +static void iser_cleanup_handler(struct rdma_cm_id *cma_id, + bool destroy_device) +{ + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + + /* + * We are not guaranteed that we visited disconnected_handler + * by now, call it here to be safe that we handle CM drep + * and flush errors. + */ + iser_disconnected_handler(cma_id); + iser_free_ib_conn_res(iser_conn, destroy_device); + complete(&iser_conn->ib_completion); +}; + static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { + struct iser_conn *iser_conn; + int ret = 0; + + iser_conn = (struct iser_conn *)cma_id->context; iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); + mutex_lock(&iser_conn->state_mutex); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: iser_addr_handler(cma_id); @@ -783,89 +845,107 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: iser_disconnected_handler(cma_id); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* + * we *must* destroy the device as we cannot rely + * on iscsid to be around to initiate error handling. + * also implicitly destroy the cma_id. + */ + iser_cleanup_handler(cma_id, true); + iser_conn->ib_conn.cma_id = NULL; + ret = 1; + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + iser_cleanup_handler(cma_id, false); + break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } - return 0; + mutex_unlock(&iser_conn->state_mutex); + + return ret; } -void iser_conn_init(struct iser_conn *ib_conn) +void iser_conn_init(struct iser_conn *iser_conn) { - ib_conn->state = ISER_CONN_INIT; - init_waitqueue_head(&ib_conn->wait); - ib_conn->post_recv_buf_count = 0; - atomic_set(&ib_conn->post_send_buf_count, 0); - init_completion(&ib_conn->stop_completion); - INIT_LIST_HEAD(&ib_conn->conn_list); - spin_lock_init(&ib_conn->lock); + iser_conn->state = ISER_CONN_INIT; + iser_conn->ib_conn.post_recv_buf_count = 0; + init_completion(&iser_conn->ib_conn.flush_comp); + init_completion(&iser_conn->stop_completion); + init_completion(&iser_conn->ib_completion); + init_completion(&iser_conn->up_completion); + INIT_LIST_HEAD(&iser_conn->conn_list); + spin_lock_init(&iser_conn->ib_conn.lock); + mutex_init(&iser_conn->state_mutex); } /** * starts the process of connecting to the target * sleeps until the connection is established or rejected */ -int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, +int iser_connect(struct iser_conn *iser_conn, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking) { - struct sockaddr *src, *dst; + struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; - sprintf(ib_conn->name, "%pI4:%d", - &dst_addr->sin_addr.s_addr, dst_addr->sin_port); + mutex_lock(&iser_conn->state_mutex); + + sprintf(iser_conn->name, "%pISp", dst_addr); + + iser_info("connecting to: %s\n", iser_conn->name); /* the device is known only --after-- address resolution */ ib_conn->device = NULL; - iser_info("connecting to: %pI4, port 0x%x\n", - &dst_addr->sin_addr, dst_addr->sin_port); + iser_conn->state = ISER_CONN_PENDING; - ib_conn->state = ISER_CONN_PENDING; + ib_conn->beacon.wr_id = ISER_BEACON_WRID; + ib_conn->beacon.opcode = IB_WR_SEND; ib_conn->cma_id = rdma_create_id(iser_cma_handler, - (void *)ib_conn, - RDMA_PS_TCP, IB_QPT_RC); + (void *)iser_conn, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { err = PTR_ERR(ib_conn->cma_id); iser_err("rdma_create_id failed: %d\n", err); goto id_failure; } - src = (struct sockaddr *)src_addr; - dst = (struct sockaddr *)dst_addr; - err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); + err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); if (err) { iser_err("rdma_resolve_addr failed: %d\n", err); goto addr_failure; } if (!non_blocking) { - wait_event_interruptible(ib_conn->wait, - (ib_conn->state != ISER_CONN_PENDING)); + wait_for_completion_interruptible(&iser_conn->up_completion); - if (ib_conn->state != ISER_CONN_UP) { + if (iser_conn->state != ISER_CONN_UP) { err = -EIO; goto connect_failure; } } + mutex_unlock(&iser_conn->state_mutex); mutex_lock(&ig.connlist_mutex); - list_add(&ib_conn->conn_list, &ig.connlist); + list_add(&iser_conn->conn_list, &ig.connlist); mutex_unlock(&ig.connlist_mutex); return 0; id_failure: ib_conn->cma_id = NULL; addr_failure: - ib_conn->state = ISER_CONN_DOWN; + iser_conn->state = ISER_CONN_DOWN; connect_failure: - iser_conn_release(ib_conn); + mutex_unlock(&iser_conn->state_mutex); + iser_conn_release(iser_conn); return err; } @@ -874,7 +954,7 @@ connect_failure: * * returns: 0 on success, errno code on failure */ -int iser_reg_page_vec(struct iser_conn *ib_conn, +int iser_reg_page_vec(struct ib_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg) { @@ -944,7 +1024,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; - struct iser_conn *ib_conn = iser_task->ib_conn; + struct iser_conn *iser_conn = iser_task->iser_conn; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct fast_reg_descriptor *desc = reg->mem_h; if (!reg->is_mr) @@ -957,17 +1038,18 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, spin_unlock_bh(&ib_conn->lock); } -int iser_post_recvl(struct iser_conn *ib_conn) +int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_recv_wr rx_wr, *rx_wr_failed; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_sge sge; int ib_ret; - sge.addr = ib_conn->login_resp_dma; + sge.addr = iser_conn->login_resp_dma; sge.length = ISER_RX_LOGIN_SIZE; sge.lkey = ib_conn->device->mr->lkey; - rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; + rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; rx_wr.next = NULL; @@ -981,20 +1063,21 @@ int iser_post_recvl(struct iser_conn *ib_conn) return ib_ret; } -int iser_post_recvm(struct iser_conn *ib_conn, int count) +int iser_post_recvm(struct iser_conn *iser_conn, int count) { struct ib_recv_wr *rx_wr, *rx_wr_failed; int i, ib_ret; - unsigned int my_rx_head = ib_conn->rx_desc_head; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &ib_conn->rx_descs[my_rx_head]; + rx_desc = &iser_conn->rx_descs[my_rx_head]; rx_wr->wr_id = (unsigned long)rx_desc; rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; - my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; + my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; } rx_wr--; @@ -1006,7 +1089,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count) iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else - ib_conn->rx_desc_head = my_rx_head; + iser_conn->rx_desc_head = my_rx_head; return ib_ret; } @@ -1016,138 +1099,166 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count) * * returns 0 on success, -1 on failure */ -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, + bool signal) { int ib_ret; struct ib_send_wr send_wr, *send_wr_failed; ib_dma_sync_single_for_device(ib_conn->device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + tx_desc->dma_addr, ISER_HEADERS_LEN, + DMA_TO_DEVICE); send_wr.next = NULL; send_wr.wr_id = (unsigned long)tx_desc; send_wr.sg_list = tx_desc->tx_sg; send_wr.num_sge = tx_desc->num_sge; send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - - atomic_inc(&ib_conn->post_send_buf_count); + send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); - if (ib_ret) { + if (ib_ret) iser_err("ib_post_send failed, ret:%d\n", ib_ret); - atomic_dec(&ib_conn->post_send_buf_count); - } + return ib_ret; } -static void iser_handle_comp_error(struct iser_tx_desc *desc, - struct iser_conn *ib_conn) +/** + * is_iser_tx_desc - Indicate if the completion wr_id + * is a TX descriptor or not. + * @iser_conn: iser connection + * @wr_id: completion WR identifier + * + * Since we cannot rely on wc opcode in FLUSH errors + * we must work around it by checking if the wr_id address + * falls in the iser connection rx_descs buffer. If so + * it is an RX descriptor, otherwize it is a TX. + */ +static inline bool +is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) +{ + void *start = iser_conn->rx_descs; + int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); + + if (wr_id >= start && wr_id < start + len) + return false; + + return true; +} + +/** + * iser_handle_comp_error() - Handle error completion + * @ib_conn: connection RDMA resources + * @wc: work completion + * + * Notes: We may handle a FLUSH error completion and in this case + * we only cleanup in case TX type was DATAOUT. For non-FLUSH + * error completion we should also notify iscsi layer that + * connection is failed (in case we passed bind stage). + */ +static void +iser_handle_comp_error(struct ib_conn *ib_conn, + struct ib_wc *wc) { - if (desc && desc->type == ISCSI_TX_DATAOUT) - kmem_cache_free(ig.desc_cache, desc); - - if (ib_conn->post_recv_buf_count == 0 && - atomic_read(&ib_conn->post_send_buf_count) == 0) { - /* getting here when the state is UP means that the conn is * - * being terminated asynchronously from the iSCSI layer's * - * perspective. */ - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) - iscsi_conn_failure(ib_conn->iscsi_conn, + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); + + if (wc->status != IB_WC_WR_FLUSH_ERR) + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); - /* no more non completed posts to the QP, complete the - * termination process w.o worrying on disconnect event */ - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) { + struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id; + + if (desc->type == ISCSI_TX_DATAOUT) + kmem_cache_free(ig.desc_cache, desc); + } else { + ib_conn->post_recv_buf_count--; } } -static int iser_drain_tx_cq(struct iser_device *device, int cq_index) +/** + * iser_handle_wc - handle a single work completion + * @wc: work completion + * + * Soft-IRQ context, work completion can be either + * SEND or RECV, and can turn out successful or + * with error (or flush error). + */ +static void iser_handle_wc(struct ib_wc *wc) { - struct ib_cq *cq = device->tx_cq[cq_index]; - struct ib_wc wc; + struct ib_conn *ib_conn; struct iser_tx_desc *tx_desc; - struct iser_conn *ib_conn; - int completed_tx = 0; - - while (ib_poll_cq(cq, 1, &wc) == 1) { - tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; - ib_conn = wc.qp->qp_context; - if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_SEND) - iser_snd_completion(tx_desc, ib_conn); - else - iser_err("expected opcode %d got %d\n", - IB_WC_SEND, wc.opcode); + struct iser_rx_desc *rx_desc; + + ib_conn = wc->qp->qp_context; + if (wc->status == IB_WC_SUCCESS) { + if (wc->opcode == IB_WC_RECV) { + rx_desc = (struct iser_rx_desc *)wc->wr_id; + iser_rcv_completion(rx_desc, wc->byte_len, + ib_conn); + } else + if (wc->opcode == IB_WC_SEND) { + tx_desc = (struct iser_tx_desc *)wc->wr_id; + iser_snd_completion(tx_desc, ib_conn); } else { - iser_err("tx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - if (wc.wr_id != ISER_FASTREG_LI_WRID) { - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); - } + iser_err("Unknown wc opcode %d\n", wc->opcode); } - completed_tx++; + } else { + if (wc->status != IB_WC_WR_FLUSH_ERR) + iser_err("wr id %llx status %d vend_err %x\n", + wc->wr_id, wc->status, wc->vendor_err); + else + iser_dbg("flush error: wr id %llx\n", wc->wr_id); + + if (wc->wr_id != ISER_FASTREG_LI_WRID && + wc->wr_id != ISER_BEACON_WRID) + iser_handle_comp_error(ib_conn, wc); + + /* complete in case all flush errors were consumed */ + if (wc->wr_id == ISER_BEACON_WRID) + complete(&ib_conn->flush_comp); } - return completed_tx; } - +/** + * iser_cq_tasklet_fn - iSER completion polling loop + * @data: iSER completion context + * + * Soft-IRQ context, polling connection CQ until + * either CQ was empty or we exausted polling budget + */ static void iser_cq_tasklet_fn(unsigned long data) { - struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; - struct iser_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; - struct ib_cq *cq = device->rx_cq[cq_index]; - struct ib_wc wc; - struct iser_rx_desc *desc; - unsigned long xfer_len; - struct iser_conn *ib_conn; - int completed_tx, completed_rx = 0; - - /* First do tx drain, so in a case where we have rx flushes and a successful - * tx completion we will still go through completion error handling. - */ - completed_tx = iser_drain_tx_cq(device, cq_index); - - while (ib_poll_cq(cq, 1, &wc) == 1) { - desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; - BUG_ON(desc == NULL); - ib_conn = wc.qp->qp_context; - if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_RECV) { - xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len, ib_conn); - } else - iser_err("expected opcode %d got %d\n", - IB_WC_RECV, wc.opcode); - } else { - if (wc.status != IB_WC_WR_FLUSH_ERR) - iser_err("rx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - ib_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, ib_conn); - } - completed_rx++; - if (!(completed_rx & 63)) - completed_tx += iser_drain_tx_cq(device, cq_index); + struct iser_comp *comp = (struct iser_comp *)data; + struct ib_cq *cq = comp->cq; + struct ib_wc *const wcs = comp->wcs; + int i, n, completed = 0; + + while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { + for (i = 0; i < n; i++) + iser_handle_wc(&wcs[i]); + + completed += n; + if (completed >= iser_cq_poll_limit) + break; } - /* #warning "it is assumed here that arming CQ only once its empty" * - * " would not cause interrupts to be missed" */ + + /* + * It is assumed here that arming CQ only once its empty + * would not cause interrupts to be missed. + */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); + iser_dbg("got %d completions\n", completed); } static void iser_cq_callback(struct ib_cq *cq, void *cq_context) { - struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; - struct iser_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; + struct iser_comp *comp = cq_context; - tasklet_schedule(&device->cq_tasklet[cq_index]); + tasklet_schedule(&comp->tasklet); } u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index d4c7928a0f36..3effa931fce2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -586,17 +586,12 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) init_completion(&isert_conn->conn_wait); init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); - kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); spin_lock_init(&isert_conn->conn_lock); INIT_LIST_HEAD(&isert_conn->conn_fr_pool); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; - isert_conn->responder_resources = event->param.conn.responder_resources; - isert_conn->initiator_depth = event->param.conn.initiator_depth; - pr_debug("Using responder_resources: %u initiator_depth: %u\n", - isert_conn->responder_resources, isert_conn->initiator_depth); isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); @@ -643,6 +638,12 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) goto out_rsp_dma_map; } + /* Set max inflight RDMA READ requests */ + isert_conn->initiator_depth = min_t(u8, + event->param.conn.initiator_depth, + device->dev_attr.max_qp_init_rd_atom); + pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth); + isert_conn->conn_device = device; isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device); if (IS_ERR(isert_conn->conn_pd)) { @@ -746,7 +747,9 @@ isert_connect_release(struct isert_conn *isert_conn) static void isert_connected_handler(struct rdma_cm_id *cma_id) { - return; + struct isert_conn *isert_conn = cma_id->context; + + kref_get(&isert_conn->conn_kref); } static void @@ -798,7 +801,6 @@ isert_disconnect_work(struct work_struct *work) wake_up: complete(&isert_conn->conn_wait); - isert_put_conn(isert_conn); } static void @@ -2183,7 +2185,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_conn, isert_cmd, send_wr, true); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -2607,58 +2609,45 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, return ret; } -static inline enum ib_t10_dif_type -se2ib_prot_type(enum target_prot_type prot_type) +static inline void +isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, + struct ib_sig_domain *domain) { - switch (prot_type) { - case TARGET_DIF_TYPE0_PROT: - return IB_T10DIF_NONE; - case TARGET_DIF_TYPE1_PROT: - return IB_T10DIF_TYPE1; - case TARGET_DIF_TYPE2_PROT: - return IB_T10DIF_TYPE2; - case TARGET_DIF_TYPE3_PROT: - return IB_T10DIF_TYPE3; - default: - return IB_T10DIF_NONE; - } -} + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size; + domain->sig.dif.ref_tag = se_cmd->reftag_seed; + /* + * At the moment we hard code those, but if in the future + * the target core would like to use it, we will take it + * from se_cmd. + */ + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT || + se_cmd->prot_type == TARGET_DIF_TYPE2_PROT) + domain->sig.dif.ref_remap = true; +}; static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) { - enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); - - sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->mem.sig.dif.pi_interval = - se_cmd->se_dev->dev_attrib.block_size; - sig_attrs->wire.sig.dif.pi_interval = - se_cmd->se_dev->dev_attrib.block_size; - switch (se_cmd->prot_op) { case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: - sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; - sig_attrs->wire.sig.dif.type = ib_prot_type; - sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); break; case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: - sig_attrs->mem.sig.dif.type = ib_prot_type; - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; - sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; case TARGET_PROT_DIN_PASS: case TARGET_PROT_DOUT_PASS: - sig_attrs->mem.sig.dif.type = ib_prot_type; - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; - sig_attrs->wire.sig.dif.type = ib_prot_type; - sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; default: pr_err("Unsupported PI operation %d\n", se_cmd->prot_op); @@ -2882,7 +2871,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); isert_init_send_wr(isert_conn, isert_cmd, - &isert_cmd->tx_desc.send_wr, true); + &isert_cmd->tx_desc.send_wr, false); isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; wr->send_wr_num += 1; } @@ -3067,7 +3056,6 @@ isert_rdma_accept(struct isert_conn *isert_conn) int ret; memset(&cp, 0, sizeof(struct rdma_conn_param)); - cp.responder_resources = isert_conn->responder_resources; cp.initiator_depth = isert_conn->initiator_depth; cp.retry_count = 7; cp.rnr_retry_count = 7; @@ -3152,7 +3140,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) accept_wait: ret = down_interruptible(&isert_np->np_sem); - if (max_accept > 5) + if (ret || max_accept > 5) return -ENODEV; spin_lock_bh(&np->np_thread_lock); @@ -3215,7 +3203,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) pr_debug("isert_wait_conn: Starting \n"); mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->conn_cm_id) { + if (isert_conn->conn_cm_id && !isert_conn->disconnect) { pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } @@ -3234,6 +3222,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) wait_for_completion(&isert_conn->conn_wait_comp_err); wait_for_completion(&isert_conn->conn_wait); + isert_put_conn(isert_conn); } static void isert_free_conn(struct iscsi_conn *conn) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e3c2c5b4297f..62d2a18e1b41 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) SCSI_SENSE_BUFFERSIZE)); } - if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) - scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); - else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) + if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); srp_free_req(target, req, scmnd, be32_to_cpu(rsp->req_lim_delta)); @@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -3313,16 +3319,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (!srp_remove_wq) { + ret = -ENOMEM; + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -3330,13 +3342,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index fe09f2788b15..7206547c13ce 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: + case IB_EVENT_GID_CHANGE: /* Refresh port data asynchronously. */ if (event->element.port_num <= sdev->device->phys_port_cnt) { sport = &sdev->port[event->element.port_num - 1]; @@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport) ®_req, 0, srpt_mad_send_handler, srpt_mad_recv_handler, - sport); + sport, 0); if (IS_ERR(sport->mad_agent)) { ret = PTR_ERR(sport->mad_agent); sport->mad_agent = NULL; @@ -3573,7 +3574,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) int ret, rc; p = name; - if (strnicmp(p, "0x", 2) == 0) + if (strncasecmp(p, "0x", 2) == 0) p += 2; ret = -EINVAL; len = strlen(p); |